File: | llvm/lib/Analysis/LoopAccessAnalysis.cpp |
Warning: | line 1341, column 38 Although the value stored to 'VF' is used in the enclosing expression, the value is never actually read from 'VF' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The implementation for the loop memory dependence that was originally |
10 | // developed for the loop vectorizer. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/Analysis/LoopAccessAnalysis.h" |
15 | #include "llvm/ADT/APInt.h" |
16 | #include "llvm/ADT/DenseMap.h" |
17 | #include "llvm/ADT/DepthFirstIterator.h" |
18 | #include "llvm/ADT/EquivalenceClasses.h" |
19 | #include "llvm/ADT/PointerIntPair.h" |
20 | #include "llvm/ADT/STLExtras.h" |
21 | #include "llvm/ADT/SetVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallSet.h" |
24 | #include "llvm/ADT/SmallVector.h" |
25 | #include "llvm/ADT/iterator_range.h" |
26 | #include "llvm/Analysis/AliasAnalysis.h" |
27 | #include "llvm/Analysis/AliasSetTracker.h" |
28 | #include "llvm/Analysis/LoopAnalysisManager.h" |
29 | #include "llvm/Analysis/LoopInfo.h" |
30 | #include "llvm/Analysis/MemoryLocation.h" |
31 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
32 | #include "llvm/Analysis/ScalarEvolution.h" |
33 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
34 | #include "llvm/Analysis/TargetLibraryInfo.h" |
35 | #include "llvm/Analysis/ValueTracking.h" |
36 | #include "llvm/Analysis/VectorUtils.h" |
37 | #include "llvm/IR/BasicBlock.h" |
38 | #include "llvm/IR/Constants.h" |
39 | #include "llvm/IR/DataLayout.h" |
40 | #include "llvm/IR/DebugLoc.h" |
41 | #include "llvm/IR/DerivedTypes.h" |
42 | #include "llvm/IR/DiagnosticInfo.h" |
43 | #include "llvm/IR/Dominators.h" |
44 | #include "llvm/IR/Function.h" |
45 | #include "llvm/IR/InstrTypes.h" |
46 | #include "llvm/IR/Instruction.h" |
47 | #include "llvm/IR/Instructions.h" |
48 | #include "llvm/IR/Operator.h" |
49 | #include "llvm/IR/PassManager.h" |
50 | #include "llvm/IR/Type.h" |
51 | #include "llvm/IR/Value.h" |
52 | #include "llvm/IR/ValueHandle.h" |
53 | #include "llvm/InitializePasses.h" |
54 | #include "llvm/Pass.h" |
55 | #include "llvm/Support/Casting.h" |
56 | #include "llvm/Support/CommandLine.h" |
57 | #include "llvm/Support/Debug.h" |
58 | #include "llvm/Support/ErrorHandling.h" |
59 | #include "llvm/Support/raw_ostream.h" |
60 | #include <algorithm> |
61 | #include <cassert> |
62 | #include <cstdint> |
63 | #include <cstdlib> |
64 | #include <iterator> |
65 | #include <utility> |
66 | #include <vector> |
67 | |
68 | using namespace llvm; |
69 | |
70 | #define DEBUG_TYPE"loop-accesses" "loop-accesses" |
71 | |
72 | static cl::opt<unsigned, true> |
73 | VectorizationFactor("force-vector-width", cl::Hidden, |
74 | cl::desc("Sets the SIMD width. Zero is autoselect."), |
75 | cl::location(VectorizerParams::VectorizationFactor)); |
76 | unsigned VectorizerParams::VectorizationFactor; |
77 | |
78 | static cl::opt<unsigned, true> |
79 | VectorizationInterleave("force-vector-interleave", cl::Hidden, |
80 | cl::desc("Sets the vectorization interleave count. " |
81 | "Zero is autoselect."), |
82 | cl::location( |
83 | VectorizerParams::VectorizationInterleave)); |
84 | unsigned VectorizerParams::VectorizationInterleave; |
85 | |
86 | static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( |
87 | "runtime-memory-check-threshold", cl::Hidden, |
88 | cl::desc("When performing memory disambiguation checks at runtime do not " |
89 | "generate more than this number of comparisons (default = 8)."), |
90 | cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); |
91 | unsigned VectorizerParams::RuntimeMemoryCheckThreshold; |
92 | |
93 | /// The maximum iterations used to merge memory checks |
94 | static cl::opt<unsigned> MemoryCheckMergeThreshold( |
95 | "memory-check-merge-threshold", cl::Hidden, |
96 | cl::desc("Maximum number of comparisons done when trying to merge " |
97 | "runtime memory checks. (default = 100)"), |
98 | cl::init(100)); |
99 | |
100 | /// Maximum SIMD width. |
101 | const unsigned VectorizerParams::MaxVectorWidth = 64; |
102 | |
103 | /// We collect dependences up to this threshold. |
104 | static cl::opt<unsigned> |
105 | MaxDependences("max-dependences", cl::Hidden, |
106 | cl::desc("Maximum number of dependences collected by " |
107 | "loop-access analysis (default = 100)"), |
108 | cl::init(100)); |
109 | |
110 | /// This enables versioning on the strides of symbolically striding memory |
111 | /// accesses in code like the following. |
112 | /// for (i = 0; i < N; ++i) |
113 | /// A[i * Stride1] += B[i * Stride2] ... |
114 | /// |
115 | /// Will be roughly translated to |
116 | /// if (Stride1 == 1 && Stride2 == 1) { |
117 | /// for (i = 0; i < N; i+=4) |
118 | /// A[i:i+3] += ... |
119 | /// } else |
120 | /// ... |
121 | static cl::opt<bool> EnableMemAccessVersioning( |
122 | "enable-mem-access-versioning", cl::init(true), cl::Hidden, |
123 | cl::desc("Enable symbolic stride memory access versioning")); |
124 | |
125 | /// Enable store-to-load forwarding conflict detection. This option can |
126 | /// be disabled for correctness testing. |
127 | static cl::opt<bool> EnableForwardingConflictDetection( |
128 | "store-to-load-forwarding-conflict-detection", cl::Hidden, |
129 | cl::desc("Enable conflict detection in loop-access analysis"), |
130 | cl::init(true)); |
131 | |
132 | bool VectorizerParams::isInterleaveForced() { |
133 | return ::VectorizationInterleave.getNumOccurrences() > 0; |
134 | } |
135 | |
136 | Value *llvm::stripIntegerCast(Value *V) { |
137 | if (auto *CI = dyn_cast<CastInst>(V)) |
138 | if (CI->getOperand(0)->getType()->isIntegerTy()) |
139 | return CI->getOperand(0); |
140 | return V; |
141 | } |
142 | |
143 | const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, |
144 | const ValueToValueMap &PtrToStride, |
145 | Value *Ptr, Value *OrigPtr) { |
146 | const SCEV *OrigSCEV = PSE.getSCEV(Ptr); |
147 | |
148 | // If there is an entry in the map return the SCEV of the pointer with the |
149 | // symbolic stride replaced by one. |
150 | ValueToValueMap::const_iterator SI = |
151 | PtrToStride.find(OrigPtr ? OrigPtr : Ptr); |
152 | if (SI == PtrToStride.end()) |
153 | // For a non-symbolic stride, just return the original expression. |
154 | return OrigSCEV; |
155 | |
156 | Value *StrideVal = stripIntegerCast(SI->second); |
157 | |
158 | ScalarEvolution *SE = PSE.getSE(); |
159 | const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); |
160 | const auto *CT = |
161 | static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); |
162 | |
163 | PSE.addPredicate(*SE->getEqualPredicate(U, CT)); |
164 | auto *Expr = PSE.getSCEV(Ptr); |
165 | |
166 | LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEVdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"; } } while (false) |
167 | << " by: " << *Expr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"; } } while (false); |
168 | return Expr; |
169 | } |
170 | |
171 | RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( |
172 | unsigned Index, RuntimePointerChecking &RtCheck) |
173 | : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End), |
174 | Low(RtCheck.Pointers[Index].Start) { |
175 | Members.push_back(Index); |
176 | } |
177 | |
178 | /// Calculate Start and End points of memory access. |
179 | /// Let's assume A is the first access and B is a memory access on N-th loop |
180 | /// iteration. Then B is calculated as: |
181 | /// B = A + Step*N . |
182 | /// Step value may be positive or negative. |
183 | /// N is a calculated back-edge taken count: |
184 | /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 |
185 | /// Start and End points are calculated in the following way: |
186 | /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, |
187 | /// where SizeOfElt is the size of single memory access in bytes. |
188 | /// |
189 | /// There is no conflict when the intervals are disjoint: |
190 | /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) |
191 | void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, |
192 | unsigned DepSetId, unsigned ASId, |
193 | const ValueToValueMap &Strides, |
194 | PredicatedScalarEvolution &PSE) { |
195 | // Get the stride replaced scev. |
196 | const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); |
197 | ScalarEvolution *SE = PSE.getSE(); |
198 | |
199 | const SCEV *ScStart; |
200 | const SCEV *ScEnd; |
201 | |
202 | if (SE->isLoopInvariant(Sc, Lp)) |
203 | ScStart = ScEnd = Sc; |
204 | else { |
205 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc); |
206 | assert(AR && "Invalid addrec expression")((AR && "Invalid addrec expression") ? static_cast< void> (0) : __assert_fail ("AR && \"Invalid addrec expression\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 206, __PRETTY_FUNCTION__)); |
207 | const SCEV *Ex = PSE.getBackedgeTakenCount(); |
208 | |
209 | ScStart = AR->getStart(); |
210 | ScEnd = AR->evaluateAtIteration(Ex, *SE); |
211 | const SCEV *Step = AR->getStepRecurrence(*SE); |
212 | |
213 | // For expressions with negative step, the upper bound is ScStart and the |
214 | // lower bound is ScEnd. |
215 | if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) { |
216 | if (CStep->getValue()->isNegative()) |
217 | std::swap(ScStart, ScEnd); |
218 | } else { |
219 | // Fallback case: the step is not constant, but we can still |
220 | // get the upper and lower bounds of the interval by using min/max |
221 | // expressions. |
222 | ScStart = SE->getUMinExpr(ScStart, ScEnd); |
223 | ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); |
224 | } |
225 | // Add the size of the pointed element to ScEnd. |
226 | auto &DL = Lp->getHeader()->getModule()->getDataLayout(); |
227 | Type *IdxTy = DL.getIndexType(Ptr->getType()); |
228 | const SCEV *EltSizeSCEV = |
229 | SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType()); |
230 | ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); |
231 | } |
232 | |
233 | Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); |
234 | } |
235 | |
236 | SmallVector<RuntimePointerCheck, 4> |
237 | RuntimePointerChecking::generateChecks() const { |
238 | SmallVector<RuntimePointerCheck, 4> Checks; |
239 | |
240 | for (unsigned I = 0; I < CheckingGroups.size(); ++I) { |
241 | for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { |
242 | const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; |
243 | const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; |
244 | |
245 | if (needsChecking(CGI, CGJ)) |
246 | Checks.push_back(std::make_pair(&CGI, &CGJ)); |
247 | } |
248 | } |
249 | return Checks; |
250 | } |
251 | |
252 | void RuntimePointerChecking::generateChecks( |
253 | MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { |
254 | assert(Checks.empty() && "Checks is not empty")((Checks.empty() && "Checks is not empty") ? static_cast <void> (0) : __assert_fail ("Checks.empty() && \"Checks is not empty\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 254, __PRETTY_FUNCTION__)); |
255 | groupChecks(DepCands, UseDependencies); |
256 | Checks = generateChecks(); |
257 | } |
258 | |
259 | bool RuntimePointerChecking::needsChecking( |
260 | const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { |
261 | for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I) |
262 | for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) |
263 | if (needsChecking(M.Members[I], N.Members[J])) |
264 | return true; |
265 | return false; |
266 | } |
267 | |
268 | /// Compare \p I and \p J and return the minimum. |
269 | /// Return nullptr in case we couldn't find an answer. |
270 | static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, |
271 | ScalarEvolution *SE) { |
272 | const SCEV *Diff = SE->getMinusSCEV(J, I); |
273 | const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff); |
274 | |
275 | if (!C) |
276 | return nullptr; |
277 | if (C->getValue()->isNegative()) |
278 | return J; |
279 | return I; |
280 | } |
281 | |
282 | bool RuntimeCheckingPtrGroup::addPointer(unsigned Index) { |
283 | const SCEV *Start = RtCheck.Pointers[Index].Start; |
284 | const SCEV *End = RtCheck.Pointers[Index].End; |
285 | |
286 | // Compare the starts and ends with the known minimum and maximum |
287 | // of this set. We need to know how we compare against the min/max |
288 | // of the set in order to be able to emit memchecks. |
289 | const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE); |
290 | if (!Min0) |
291 | return false; |
292 | |
293 | const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE); |
294 | if (!Min1) |
295 | return false; |
296 | |
297 | // Update the low bound expression if we've found a new min value. |
298 | if (Min0 == Start) |
299 | Low = Start; |
300 | |
301 | // Update the high bound expression if we've found a new max value. |
302 | if (Min1 != End) |
303 | High = End; |
304 | |
305 | Members.push_back(Index); |
306 | return true; |
307 | } |
308 | |
309 | void RuntimePointerChecking::groupChecks( |
310 | MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { |
311 | // We build the groups from dependency candidates equivalence classes |
312 | // because: |
313 | // - We know that pointers in the same equivalence class share |
314 | // the same underlying object and therefore there is a chance |
315 | // that we can compare pointers |
316 | // - We wouldn't be able to merge two pointers for which we need |
317 | // to emit a memcheck. The classes in DepCands are already |
318 | // conveniently built such that no two pointers in the same |
319 | // class need checking against each other. |
320 | |
321 | // We use the following (greedy) algorithm to construct the groups |
322 | // For every pointer in the equivalence class: |
323 | // For each existing group: |
324 | // - if the difference between this pointer and the min/max bounds |
325 | // of the group is a constant, then make the pointer part of the |
326 | // group and update the min/max bounds of that group as required. |
327 | |
328 | CheckingGroups.clear(); |
329 | |
330 | // If we need to check two pointers to the same underlying object |
331 | // with a non-constant difference, we shouldn't perform any pointer |
332 | // grouping with those pointers. This is because we can easily get |
333 | // into cases where the resulting check would return false, even when |
334 | // the accesses are safe. |
335 | // |
336 | // The following example shows this: |
337 | // for (i = 0; i < 1000; ++i) |
338 | // a[5000 + i * m] = a[i] + a[i + 9000] |
339 | // |
340 | // Here grouping gives a check of (5000, 5000 + 1000 * m) against |
341 | // (0, 10000) which is always false. However, if m is 1, there is no |
342 | // dependence. Not grouping the checks for a[i] and a[i + 9000] allows |
343 | // us to perform an accurate check in this case. |
344 | // |
345 | // The above case requires that we have an UnknownDependence between |
346 | // accesses to the same underlying object. This cannot happen unless |
347 | // FoundNonConstantDistanceDependence is set, and therefore UseDependencies |
348 | // is also false. In this case we will use the fallback path and create |
349 | // separate checking groups for all pointers. |
350 | |
351 | // If we don't have the dependency partitions, construct a new |
352 | // checking pointer group for each pointer. This is also required |
353 | // for correctness, because in this case we can have checking between |
354 | // pointers to the same underlying object. |
355 | if (!UseDependencies) { |
356 | for (unsigned I = 0; I < Pointers.size(); ++I) |
357 | CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); |
358 | return; |
359 | } |
360 | |
361 | unsigned TotalComparisons = 0; |
362 | |
363 | DenseMap<Value *, unsigned> PositionMap; |
364 | for (unsigned Index = 0; Index < Pointers.size(); ++Index) |
365 | PositionMap[Pointers[Index].PointerValue] = Index; |
366 | |
367 | // We need to keep track of what pointers we've already seen so we |
368 | // don't process them twice. |
369 | SmallSet<unsigned, 2> Seen; |
370 | |
371 | // Go through all equivalence classes, get the "pointer check groups" |
372 | // and add them to the overall solution. We use the order in which accesses |
373 | // appear in 'Pointers' to enforce determinism. |
374 | for (unsigned I = 0; I < Pointers.size(); ++I) { |
375 | // We've seen this pointer before, and therefore already processed |
376 | // its equivalence class. |
377 | if (Seen.count(I)) |
378 | continue; |
379 | |
380 | MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, |
381 | Pointers[I].IsWritePtr); |
382 | |
383 | SmallVector<RuntimeCheckingPtrGroup, 2> Groups; |
384 | auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); |
385 | |
386 | // Because DepCands is constructed by visiting accesses in the order in |
387 | // which they appear in alias sets (which is deterministic) and the |
388 | // iteration order within an equivalence class member is only dependent on |
389 | // the order in which unions and insertions are performed on the |
390 | // equivalence class, the iteration order is deterministic. |
391 | for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); |
392 | MI != ME; ++MI) { |
393 | auto PointerI = PositionMap.find(MI->getPointer()); |
394 | assert(PointerI != PositionMap.end() &&((PointerI != PositionMap.end() && "pointer in equivalence class not found in PositionMap" ) ? static_cast<void> (0) : __assert_fail ("PointerI != PositionMap.end() && \"pointer in equivalence class not found in PositionMap\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 395, __PRETTY_FUNCTION__)) |
395 | "pointer in equivalence class not found in PositionMap")((PointerI != PositionMap.end() && "pointer in equivalence class not found in PositionMap" ) ? static_cast<void> (0) : __assert_fail ("PointerI != PositionMap.end() && \"pointer in equivalence class not found in PositionMap\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 395, __PRETTY_FUNCTION__)); |
396 | unsigned Pointer = PointerI->second; |
397 | bool Merged = false; |
398 | // Mark this pointer as seen. |
399 | Seen.insert(Pointer); |
400 | |
401 | // Go through all the existing sets and see if we can find one |
402 | // which can include this pointer. |
403 | for (RuntimeCheckingPtrGroup &Group : Groups) { |
404 | // Don't perform more than a certain amount of comparisons. |
405 | // This should limit the cost of grouping the pointers to something |
406 | // reasonable. If we do end up hitting this threshold, the algorithm |
407 | // will create separate groups for all remaining pointers. |
408 | if (TotalComparisons > MemoryCheckMergeThreshold) |
409 | break; |
410 | |
411 | TotalComparisons++; |
412 | |
413 | if (Group.addPointer(Pointer)) { |
414 | Merged = true; |
415 | break; |
416 | } |
417 | } |
418 | |
419 | if (!Merged) |
420 | // We couldn't add this pointer to any existing set or the threshold |
421 | // for the number of comparisons has been reached. Create a new group |
422 | // to hold the current pointer. |
423 | Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); |
424 | } |
425 | |
426 | // We've computed the grouped checks for this partition. |
427 | // Save the results and continue with the next one. |
428 | llvm::copy(Groups, std::back_inserter(CheckingGroups)); |
429 | } |
430 | } |
431 | |
432 | bool RuntimePointerChecking::arePointersInSamePartition( |
433 | const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1, |
434 | unsigned PtrIdx2) { |
435 | return (PtrToPartition[PtrIdx1] != -1 && |
436 | PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); |
437 | } |
438 | |
439 | bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { |
440 | const PointerInfo &PointerI = Pointers[I]; |
441 | const PointerInfo &PointerJ = Pointers[J]; |
442 | |
443 | // No need to check if two readonly pointers intersect. |
444 | if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) |
445 | return false; |
446 | |
447 | // Only need to check pointers between two different dependency sets. |
448 | if (PointerI.DependencySetId == PointerJ.DependencySetId) |
449 | return false; |
450 | |
451 | // Only need to check pointers in the same alias set. |
452 | if (PointerI.AliasSetId != PointerJ.AliasSetId) |
453 | return false; |
454 | |
455 | return true; |
456 | } |
457 | |
458 | void RuntimePointerChecking::printChecks( |
459 | raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks, |
460 | unsigned Depth) const { |
461 | unsigned N = 0; |
462 | for (const auto &Check : Checks) { |
463 | const auto &First = Check.first->Members, &Second = Check.second->Members; |
464 | |
465 | OS.indent(Depth) << "Check " << N++ << ":\n"; |
466 | |
467 | OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; |
468 | for (unsigned K = 0; K < First.size(); ++K) |
469 | OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; |
470 | |
471 | OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; |
472 | for (unsigned K = 0; K < Second.size(); ++K) |
473 | OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n"; |
474 | } |
475 | } |
476 | |
477 | void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { |
478 | |
479 | OS.indent(Depth) << "Run-time memory checks:\n"; |
480 | printChecks(OS, Checks, Depth); |
481 | |
482 | OS.indent(Depth) << "Grouped accesses:\n"; |
483 | for (unsigned I = 0; I < CheckingGroups.size(); ++I) { |
484 | const auto &CG = CheckingGroups[I]; |
485 | |
486 | OS.indent(Depth + 2) << "Group " << &CG << ":\n"; |
487 | OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High |
488 | << ")\n"; |
489 | for (unsigned J = 0; J < CG.Members.size(); ++J) { |
490 | OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr |
491 | << "\n"; |
492 | } |
493 | } |
494 | } |
495 | |
496 | namespace { |
497 | |
498 | /// Analyses memory accesses in a loop. |
499 | /// |
500 | /// Checks whether run time pointer checks are needed and builds sets for data |
501 | /// dependence checking. |
502 | class AccessAnalysis { |
503 | public: |
504 | /// Read or write access location. |
505 | typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; |
506 | typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; |
507 | |
508 | AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, |
509 | MemoryDepChecker::DepCandidates &DA, |
510 | PredicatedScalarEvolution &PSE) |
511 | : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), |
512 | IsRTCheckAnalysisNeeded(false), PSE(PSE) {} |
513 | |
514 | /// Register a load and whether it is only read from. |
515 | void addLoad(MemoryLocation &Loc, bool IsReadOnly) { |
516 | Value *Ptr = const_cast<Value*>(Loc.Ptr); |
517 | AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); |
518 | Accesses.insert(MemAccessInfo(Ptr, false)); |
519 | if (IsReadOnly) |
520 | ReadOnlyPtr.insert(Ptr); |
521 | } |
522 | |
523 | /// Register a store. |
524 | void addStore(MemoryLocation &Loc) { |
525 | Value *Ptr = const_cast<Value*>(Loc.Ptr); |
526 | AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags); |
527 | Accesses.insert(MemAccessInfo(Ptr, true)); |
528 | } |
529 | |
530 | /// Check if we can emit a run-time no-alias check for \p Access. |
531 | /// |
532 | /// Returns true if we can emit a run-time no alias check for \p Access. |
533 | /// If we can check this access, this also adds it to a dependence set and |
534 | /// adds a run-time to check for it to \p RtCheck. If \p Assume is true, |
535 | /// we will attempt to use additional run-time checks in order to get |
536 | /// the bounds of the pointer. |
537 | bool createCheckForAccess(RuntimePointerChecking &RtCheck, |
538 | MemAccessInfo Access, |
539 | const ValueToValueMap &Strides, |
540 | DenseMap<Value *, unsigned> &DepSetId, |
541 | Loop *TheLoop, unsigned &RunningDepId, |
542 | unsigned ASId, bool ShouldCheckStride, |
543 | bool Assume); |
544 | |
545 | /// Check whether we can check the pointers at runtime for |
546 | /// non-intersection. |
547 | /// |
548 | /// Returns true if we need no check or if we do and we can generate them |
549 | /// (i.e. the pointers have computable bounds). |
550 | bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, |
551 | Loop *TheLoop, const ValueToValueMap &Strides, |
552 | bool ShouldCheckWrap = false); |
553 | |
554 | /// Goes over all memory accesses, checks whether a RT check is needed |
555 | /// and builds sets of dependent accesses. |
556 | void buildDependenceSets() { |
557 | processMemAccesses(); |
558 | } |
559 | |
560 | /// Initial processing of memory accesses determined that we need to |
561 | /// perform dependency checking. |
562 | /// |
563 | /// Note that this can later be cleared if we retry memcheck analysis without |
564 | /// dependency checking (i.e. FoundNonConstantDistanceDependence). |
565 | bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } |
566 | |
567 | /// We decided that no dependence analysis would be used. Reset the state. |
568 | void resetDepChecks(MemoryDepChecker &DepChecker) { |
569 | CheckDeps.clear(); |
570 | DepChecker.clearDependences(); |
571 | } |
572 | |
573 | MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } |
574 | |
575 | private: |
576 | typedef SetVector<MemAccessInfo> PtrAccessSet; |
577 | |
578 | /// Go over all memory access and check whether runtime pointer checks |
579 | /// are needed and build sets of dependency check candidates. |
580 | void processMemAccesses(); |
581 | |
582 | /// Set of all accesses. |
583 | PtrAccessSet Accesses; |
584 | |
585 | /// The loop being checked. |
586 | const Loop *TheLoop; |
587 | |
588 | /// List of accesses that need a further dependence check. |
589 | MemAccessInfoList CheckDeps; |
590 | |
591 | /// Set of pointers that are read only. |
592 | SmallPtrSet<Value*, 16> ReadOnlyPtr; |
593 | |
594 | /// An alias set tracker to partition the access set by underlying object and |
595 | //intrinsic property (such as TBAA metadata). |
596 | AliasSetTracker AST; |
597 | |
598 | LoopInfo *LI; |
599 | |
600 | /// Sets of potentially dependent accesses - members of one set share an |
601 | /// underlying pointer. The set "CheckDeps" identfies which sets really need a |
602 | /// dependence check. |
603 | MemoryDepChecker::DepCandidates &DepCands; |
604 | |
605 | /// Initial processing of memory accesses determined that we may need |
606 | /// to add memchecks. Perform the analysis to determine the necessary checks. |
607 | /// |
608 | /// Note that, this is different from isDependencyCheckNeeded. When we retry |
609 | /// memcheck analysis without dependency checking |
610 | /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is |
611 | /// cleared while this remains set if we have potentially dependent accesses. |
612 | bool IsRTCheckAnalysisNeeded; |
613 | |
614 | /// The SCEV predicate containing all the SCEV-related assumptions. |
615 | PredicatedScalarEvolution &PSE; |
616 | }; |
617 | |
618 | } // end anonymous namespace |
619 | |
620 | /// Check whether a pointer can participate in a runtime bounds check. |
621 | /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr |
622 | /// by adding run-time checks (overflow checks) if necessary. |
623 | static bool hasComputableBounds(PredicatedScalarEvolution &PSE, |
624 | const ValueToValueMap &Strides, Value *Ptr, |
625 | Loop *L, bool Assume) { |
626 | const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); |
627 | |
628 | // The bounds for loop-invariant pointer is trivial. |
629 | if (PSE.getSE()->isLoopInvariant(PtrScev, L)) |
630 | return true; |
631 | |
632 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); |
633 | |
634 | if (!AR && Assume) |
635 | AR = PSE.getAsAddRec(Ptr); |
636 | |
637 | if (!AR) |
638 | return false; |
639 | |
640 | return AR->isAffine(); |
641 | } |
642 | |
643 | /// Check whether a pointer address cannot wrap. |
644 | static bool isNoWrap(PredicatedScalarEvolution &PSE, |
645 | const ValueToValueMap &Strides, Value *Ptr, Loop *L) { |
646 | const SCEV *PtrScev = PSE.getSCEV(Ptr); |
647 | if (PSE.getSE()->isLoopInvariant(PtrScev, L)) |
648 | return true; |
649 | |
650 | int64_t Stride = getPtrStride(PSE, Ptr, L, Strides); |
651 | if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) |
652 | return true; |
653 | |
654 | return false; |
655 | } |
656 | |
657 | bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, |
658 | MemAccessInfo Access, |
659 | const ValueToValueMap &StridesMap, |
660 | DenseMap<Value *, unsigned> &DepSetId, |
661 | Loop *TheLoop, unsigned &RunningDepId, |
662 | unsigned ASId, bool ShouldCheckWrap, |
663 | bool Assume) { |
664 | Value *Ptr = Access.getPointer(); |
665 | |
666 | if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume)) |
667 | return false; |
668 | |
669 | // When we run after a failing dependency check we have to make sure |
670 | // we don't have wrapping pointers. |
671 | if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) { |
672 | auto *Expr = PSE.getSCEV(Ptr); |
673 | if (!Assume || !isa<SCEVAddRecExpr>(Expr)) |
674 | return false; |
675 | PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); |
676 | } |
677 | |
678 | // The id of the dependence set. |
679 | unsigned DepId; |
680 | |
681 | if (isDependencyCheckNeeded()) { |
682 | Value *Leader = DepCands.getLeaderValue(Access).getPointer(); |
683 | unsigned &LeaderId = DepSetId[Leader]; |
684 | if (!LeaderId) |
685 | LeaderId = RunningDepId++; |
686 | DepId = LeaderId; |
687 | } else |
688 | // Each access has its own dependence set. |
689 | DepId = RunningDepId++; |
690 | |
691 | bool IsWrite = Access.getInt(); |
692 | RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); |
693 | LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'; } } while (false); |
694 | |
695 | return true; |
696 | } |
697 | |
698 | bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, |
699 | ScalarEvolution *SE, Loop *TheLoop, |
700 | const ValueToValueMap &StridesMap, |
701 | bool ShouldCheckWrap) { |
702 | // Find pointers with computable bounds. We are going to use this information |
703 | // to place a runtime bound check. |
704 | bool CanDoRT = true; |
705 | |
706 | bool MayNeedRTCheck = false; |
707 | if (!IsRTCheckAnalysisNeeded) return true; |
708 | |
709 | bool IsDepCheckNeeded = isDependencyCheckNeeded(); |
710 | |
711 | // We assign a consecutive id to access from different alias sets. |
712 | // Accesses between different groups doesn't need to be checked. |
713 | unsigned ASId = 0; |
714 | for (auto &AS : AST) { |
715 | int NumReadPtrChecks = 0; |
716 | int NumWritePtrChecks = 0; |
717 | bool CanDoAliasSetRT = true; |
718 | ++ASId; |
719 | |
720 | // We assign consecutive id to access from different dependence sets. |
721 | // Accesses within the same set don't need a runtime check. |
722 | unsigned RunningDepId = 1; |
723 | DenseMap<Value *, unsigned> DepSetId; |
724 | |
725 | SmallVector<MemAccessInfo, 4> Retries; |
726 | |
727 | // First, count how many write and read accesses are in the alias set. Also |
728 | // collect MemAccessInfos for later. |
729 | SmallVector<MemAccessInfo, 4> AccessInfos; |
730 | for (const auto &A : AS) { |
731 | Value *Ptr = A.getValue(); |
732 | bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); |
733 | |
734 | if (IsWrite) |
735 | ++NumWritePtrChecks; |
736 | else |
737 | ++NumReadPtrChecks; |
738 | AccessInfos.emplace_back(Ptr, IsWrite); |
739 | } |
740 | |
741 | // We do not need runtime checks for this alias set, if there are no writes |
742 | // or a single write and no reads. |
743 | if (NumWritePtrChecks == 0 || |
744 | (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { |
745 | assert((AS.size() <= 1 ||(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
746 | all_of(AS,(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
747 | [this](auto AC) {(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
748 | MemAccessInfo AccessWrite(AC.getValue(), true);(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
749 | return DepCands.findValue(AccessWrite) == DepCands.end();(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
750 | })) &&(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
751 | "Can only skip updating CanDoRT below, if all entries in AS "(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)) |
752 | "are reads or there is at most 1 entry")(((AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue( AccessWrite) == DepCands.end(); })) && "Can only skip updating CanDoRT below, if all entries in AS " "are reads or there is at most 1 entry") ? static_cast<void > (0) : __assert_fail ("(AS.size() <= 1 || all_of(AS, [this](auto AC) { MemAccessInfo AccessWrite(AC.getValue(), true); return DepCands.findValue(AccessWrite) == DepCands.end(); })) && \"Can only skip updating CanDoRT below, if all entries in AS \" \"are reads or there is at most 1 entry\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 752, __PRETTY_FUNCTION__)); |
753 | continue; |
754 | } |
755 | |
756 | for (auto &Access : AccessInfos) { |
757 | if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, |
758 | RunningDepId, ASId, ShouldCheckWrap, false)) { |
759 | LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'; } } while (false ) |
760 | << *Access.getPointer() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'; } } while (false ); |
761 | Retries.push_back(Access); |
762 | CanDoAliasSetRT = false; |
763 | } |
764 | } |
765 | |
766 | // Note that this function computes CanDoRT and MayNeedRTCheck |
767 | // independently. For example CanDoRT=false, MayNeedRTCheck=false means that |
768 | // we have a pointer for which we couldn't find the bounds but we don't |
769 | // actually need to emit any checks so it does not matter. |
770 | // |
771 | // We need runtime checks for this alias set, if there are at least 2 |
772 | // dependence sets (in which case RunningDepId > 2) or if we need to re-try |
773 | // any bound checks (because in that case the number of dependence sets is |
774 | // incomplete). |
775 | bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); |
776 | |
777 | // We need to perform run-time alias checks, but some pointers had bounds |
778 | // that couldn't be checked. |
779 | if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) { |
780 | // Reset the CanDoSetRt flag and retry all accesses that have failed. |
781 | // We know that we need these checks, so we can now be more aggressive |
782 | // and add further checks if required (overflow checks). |
783 | CanDoAliasSetRT = true; |
784 | for (auto Access : Retries) |
785 | if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, |
786 | TheLoop, RunningDepId, ASId, |
787 | ShouldCheckWrap, /*Assume=*/true)) { |
788 | CanDoAliasSetRT = false; |
789 | break; |
790 | } |
791 | } |
792 | |
793 | CanDoRT &= CanDoAliasSetRT; |
794 | MayNeedRTCheck |= NeedsAliasSetRTCheck; |
795 | ++ASId; |
796 | } |
797 | |
798 | // If the pointers that we would use for the bounds comparison have different |
799 | // address spaces, assume the values aren't directly comparable, so we can't |
800 | // use them for the runtime check. We also have to assume they could |
801 | // overlap. In the future there should be metadata for whether address spaces |
802 | // are disjoint. |
803 | unsigned NumPointers = RtCheck.Pointers.size(); |
804 | for (unsigned i = 0; i < NumPointers; ++i) { |
805 | for (unsigned j = i + 1; j < NumPointers; ++j) { |
806 | // Only need to check pointers between two different dependency sets. |
807 | if (RtCheck.Pointers[i].DependencySetId == |
808 | RtCheck.Pointers[j].DependencySetId) |
809 | continue; |
810 | // Only need to check pointers in the same alias set. |
811 | if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) |
812 | continue; |
813 | |
814 | Value *PtrI = RtCheck.Pointers[i].PointerValue; |
815 | Value *PtrJ = RtCheck.Pointers[j].PointerValue; |
816 | |
817 | unsigned ASi = PtrI->getType()->getPointerAddressSpace(); |
818 | unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); |
819 | if (ASi != ASj) { |
820 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between" " different address spaces\n"; } } while (false) |
821 | dbgs() << "LAA: Runtime check would require comparison between"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between" " different address spaces\n"; } } while (false) |
822 | " different address spaces\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Runtime check would require comparison between" " different address spaces\n"; } } while (false); |
823 | return false; |
824 | } |
825 | } |
826 | } |
827 | |
828 | if (MayNeedRTCheck && CanDoRT) |
829 | RtCheck.generateChecks(DepCands, IsDepCheckNeeded); |
830 | |
831 | LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n" ; } } while (false) |
832 | << " pointer comparisons.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() << " pointer comparisons.\n" ; } } while (false); |
833 | |
834 | // If we can do run-time checks, but there are no checks, no runtime checks |
835 | // are needed. This can happen when all pointers point to the same underlying |
836 | // object for example. |
837 | RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck; |
838 | |
839 | bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; |
840 | if (!CanDoRTIfNeeded) |
841 | RtCheck.reset(); |
842 | return CanDoRTIfNeeded; |
843 | } |
844 | |
845 | void AccessAnalysis::processMemAccesses() { |
846 | // We process the set twice: first we process read-write pointers, last we |
847 | // process read-only pointers. This allows us to skip dependence tests for |
848 | // read-only pointers. |
849 | |
850 | LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Processing memory accesses...\n" ; } } while (false); |
851 | LLVM_DEBUG(dbgs() << " AST: "; AST.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << " AST: "; AST.dump(); } } while (false); |
852 | LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"; } } while (false); |
853 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { { for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt () ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }; } } while (false) |
854 | for (auto A : Accesses)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { { for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt () ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }; } } while (false) |
855 | dbgs() << "\t" << *A.getPointer() << " (" <<do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { { for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt () ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }; } } while (false) |
856 | (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { { for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt () ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }; } } while (false) |
857 | "read-only" : "read")) << ")\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { { for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt () ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }; } } while (false) |
858 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { { for (auto A : Accesses) dbgs() << "\t" << *A.getPointer() << " (" << (A.getInt () ? "write" : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" : "read")) << ")\n"; }; } } while (false); |
859 | |
860 | // The AliasSetTracker has nicely partitioned our pointers by metadata |
861 | // compatibility and potential for underlying-object overlap. As a result, we |
862 | // only need to check for potential pointer dependencies within each alias |
863 | // set. |
864 | for (const auto &AS : AST) { |
865 | // Note that both the alias-set tracker and the alias sets themselves used |
866 | // linked lists internally and so the iteration order here is deterministic |
867 | // (matching the original instruction order within each set). |
868 | |
869 | bool SetHasWrite = false; |
870 | |
871 | // Map of pointers to last access encountered. |
872 | typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap; |
873 | UnderlyingObjToAccessMap ObjToLastAccess; |
874 | |
875 | // Set of access to check after all writes have been processed. |
876 | PtrAccessSet DeferredAccesses; |
877 | |
878 | // Iterate over each alias set twice, once to process read/write pointers, |
879 | // and then to process read-only pointers. |
880 | for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { |
881 | bool UseDeferred = SetIteration > 0; |
882 | PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses; |
883 | |
884 | for (const auto &AV : AS) { |
885 | Value *Ptr = AV.getValue(); |
886 | |
887 | // For a single memory access in AliasSetTracker, Accesses may contain |
888 | // both read and write, and they both need to be handled for CheckDeps. |
889 | for (const auto &AC : S) { |
890 | if (AC.getPointer() != Ptr) |
891 | continue; |
892 | |
893 | bool IsWrite = AC.getInt(); |
894 | |
895 | // If we're using the deferred access set, then it contains only |
896 | // reads. |
897 | bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; |
898 | if (UseDeferred && !IsReadOnlyPtr) |
899 | continue; |
900 | // Otherwise, the pointer must be in the PtrAccessSet, either as a |
901 | // read or a write. |
902 | assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count (MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?" ) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 904, __PRETTY_FUNCTION__)) |
903 | S.count(MemAccessInfo(Ptr, false))) &&((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count (MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?" ) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 904, __PRETTY_FUNCTION__)) |
904 | "Alias-set pointer not in the access set?")((((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count (MemAccessInfo(Ptr, false))) && "Alias-set pointer not in the access set?" ) ? static_cast<void> (0) : __assert_fail ("((IsReadOnlyPtr && UseDeferred) || IsWrite || S.count(MemAccessInfo(Ptr, false))) && \"Alias-set pointer not in the access set?\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 904, __PRETTY_FUNCTION__)); |
905 | |
906 | MemAccessInfo Access(Ptr, IsWrite); |
907 | DepCands.insert(Access); |
908 | |
909 | // Memorize read-only pointers for later processing and skip them in |
910 | // the first round (they need to be checked after we have seen all |
911 | // write pointers). Note: we also mark pointer that are not |
912 | // consecutive as "read-only" pointers (so that we check |
913 | // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". |
914 | if (!UseDeferred && IsReadOnlyPtr) { |
915 | DeferredAccesses.insert(Access); |
916 | continue; |
917 | } |
918 | |
919 | // If this is a write - check other reads and writes for conflicts. If |
920 | // this is a read only check other writes for conflicts (but only if |
921 | // there is no other write to the ptr - this is an optimization to |
922 | // catch "a[i] = a[i] + " without having to do a dependence check). |
923 | if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { |
924 | CheckDeps.push_back(Access); |
925 | IsRTCheckAnalysisNeeded = true; |
926 | } |
927 | |
928 | if (IsWrite) |
929 | SetHasWrite = true; |
930 | |
931 | // Create sets of pointers connected by a shared alias set and |
932 | // underlying object. |
933 | typedef SmallVector<const Value *, 16> ValueVector; |
934 | ValueVector TempObjects; |
935 | |
936 | getUnderlyingObjects(Ptr, TempObjects, LI); |
937 | LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "Underlying objects for pointer " << *Ptr << "\n"; } } while (false) |
938 | << "Underlying objects for pointer " << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "Underlying objects for pointer " << *Ptr << "\n"; } } while (false); |
939 | for (const Value *UnderlyingObj : TempObjects) { |
940 | // nullptr never alias, don't join sets for pointer that have "null" |
941 | // in their UnderlyingObjects list. |
942 | if (isa<ConstantPointerNull>(UnderlyingObj) && |
943 | !NullPointerIsDefined( |
944 | TheLoop->getHeader()->getParent(), |
945 | UnderlyingObj->getType()->getPointerAddressSpace())) |
946 | continue; |
947 | |
948 | UnderlyingObjToAccessMap::iterator Prev = |
949 | ObjToLastAccess.find(UnderlyingObj); |
950 | if (Prev != ObjToLastAccess.end()) |
951 | DepCands.unionSets(Access, Prev->second); |
952 | |
953 | ObjToLastAccess[UnderlyingObj] = Access; |
954 | LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << " " << *UnderlyingObj << "\n"; } } while (false); |
955 | } |
956 | } |
957 | } |
958 | } |
959 | } |
960 | } |
961 | |
962 | static bool isInBoundsGep(Value *Ptr) { |
963 | if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) |
964 | return GEP->isInBounds(); |
965 | return false; |
966 | } |
967 | |
968 | /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, |
969 | /// i.e. monotonically increasing/decreasing. |
970 | static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, |
971 | PredicatedScalarEvolution &PSE, const Loop *L) { |
972 | // FIXME: This should probably only return true for NUW. |
973 | if (AR->getNoWrapFlags(SCEV::NoWrapMask)) |
974 | return true; |
975 | |
976 | // Scalar evolution does not propagate the non-wrapping flags to values that |
977 | // are derived from a non-wrapping induction variable because non-wrapping |
978 | // could be flow-sensitive. |
979 | // |
980 | // Look through the potentially overflowing instruction to try to prove |
981 | // non-wrapping for the *specific* value of Ptr. |
982 | |
983 | // The arithmetic implied by an inbounds GEP can't overflow. |
984 | auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); |
985 | if (!GEP || !GEP->isInBounds()) |
986 | return false; |
987 | |
988 | // Make sure there is only one non-const index and analyze that. |
989 | Value *NonConstIndex = nullptr; |
990 | for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end())) |
991 | if (!isa<ConstantInt>(Index)) { |
992 | if (NonConstIndex) |
993 | return false; |
994 | NonConstIndex = Index; |
995 | } |
996 | if (!NonConstIndex) |
997 | // The recurrence is on the pointer, ignore for now. |
998 | return false; |
999 | |
1000 | // The index in GEP is signed. It is non-wrapping if it's derived from a NSW |
1001 | // AddRec using a NSW operation. |
1002 | if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex)) |
1003 | if (OBO->hasNoSignedWrap() && |
1004 | // Assume constant for other the operand so that the AddRec can be |
1005 | // easily found. |
1006 | isa<ConstantInt>(OBO->getOperand(1))) { |
1007 | auto *OpScev = PSE.getSCEV(OBO->getOperand(0)); |
1008 | |
1009 | if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev)) |
1010 | return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW); |
1011 | } |
1012 | |
1013 | return false; |
1014 | } |
1015 | |
1016 | /// Check whether the access through \p Ptr has a constant stride. |
1017 | int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, |
1018 | const Loop *Lp, const ValueToValueMap &StridesMap, |
1019 | bool Assume, bool ShouldCheckWrap) { |
1020 | Type *Ty = Ptr->getType(); |
1021 | assert(Ty->isPointerTy() && "Unexpected non-ptr")((Ty->isPointerTy() && "Unexpected non-ptr") ? static_cast <void> (0) : __assert_fail ("Ty->isPointerTy() && \"Unexpected non-ptr\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1021, __PRETTY_FUNCTION__)); |
1022 | |
1023 | // Make sure that the pointer does not point to aggregate types. |
1024 | auto *PtrTy = cast<PointerType>(Ty); |
1025 | if (PtrTy->getElementType()->isAggregateType()) { |
1026 | LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"; } } while (false) |
1027 | << *Ptr << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr << "\n"; } } while (false); |
1028 | return 0; |
1029 | } |
1030 | |
1031 | const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); |
1032 | |
1033 | const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); |
1034 | if (Assume && !AR) |
1035 | AR = PSE.getAsAddRec(Ptr); |
1036 | |
1037 | if (!AR) { |
1038 | LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr << " SCEV: " << *PtrScev << "\n" ; } } while (false) |
1039 | << " SCEV: " << *PtrScev << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr << " SCEV: " << *PtrScev << "\n" ; } } while (false); |
1040 | return 0; |
1041 | } |
1042 | |
1043 | // The access function must stride over the innermost loop. |
1044 | if (Lp != AR->getLoop()) { |
1045 | LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false) |
1046 | << *Ptr << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not striding over innermost loop " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false); |
1047 | return 0; |
1048 | } |
1049 | |
1050 | // The address calculation must not wrap. Otherwise, a dependence could be |
1051 | // inverted. |
1052 | // An inbounds getelementptr that is a AddRec with a unit stride |
1053 | // cannot wrap per definition. The unit stride requirement is checked later. |
1054 | // An getelementptr without an inbounds attribute and unit stride would have |
1055 | // to access the pointer value "0" which is undefined behavior in address |
1056 | // space 0, therefore we can also vectorize this case. |
1057 | bool IsInBoundsGEP = isInBoundsGep(Ptr); |
1058 | bool IsNoWrapAddRec = !ShouldCheckWrap || |
1059 | PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) || |
1060 | isNoWrapAddRec(Ptr, AR, PSE, Lp); |
1061 | if (!IsNoWrapAddRec && !IsInBoundsGEP && |
1062 | NullPointerIsDefined(Lp->getHeader()->getParent(), |
1063 | PtrTy->getAddressSpace())) { |
1064 | if (Assume) { |
1065 | PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); |
1066 | IsNoWrapAddRec = true; |
1067 | LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1068 | << "LAA: Pointer: " << *Ptr << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1069 | << "LAA: SCEV: " << *AR << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1070 | << "LAA: Added an overflow assumption\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Pointer may wrap in the address space:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false); |
1071 | } else { |
1072 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false) |
1073 | dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false) |
1074 | << *Ptr << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false); |
1075 | return 0; |
1076 | } |
1077 | } |
1078 | |
1079 | // Check the step is constant. |
1080 | const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); |
1081 | |
1082 | // Calculate the pointer stride and check if it is constant. |
1083 | const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); |
1084 | if (!C) { |
1085 | LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptrdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false) |
1086 | << " SCEV: " << *AR << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr << " SCEV: " << *AR << "\n"; } } while (false); |
1087 | return 0; |
1088 | } |
1089 | |
1090 | auto &DL = Lp->getHeader()->getModule()->getDataLayout(); |
1091 | int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); |
1092 | const APInt &APStepVal = C->getAPInt(); |
1093 | |
1094 | // Huge step value - give up. |
1095 | if (APStepVal.getBitWidth() > 64) |
1096 | return 0; |
1097 | |
1098 | int64_t StepVal = APStepVal.getSExtValue(); |
1099 | |
1100 | // Strided access. |
1101 | int64_t Stride = StepVal / Size; |
1102 | int64_t Rem = StepVal % Size; |
1103 | if (Rem) |
1104 | return 0; |
1105 | |
1106 | // If the SCEV could wrap but we have an inbounds gep with a unit stride we |
1107 | // know we can't "wrap around the address space". In case of address space |
1108 | // zero we know that this won't happen without triggering undefined behavior. |
1109 | if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 && |
1110 | (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(), |
1111 | PtrTy->getAddressSpace()))) { |
1112 | if (Assume) { |
1113 | // We can avoid this case by adding a run-time check. |
1114 | LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either " << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1115 | << "inbounds or in address space 0 may wrap:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either " << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1116 | << "LAA: Pointer: " << *Ptr << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either " << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1117 | << "LAA: SCEV: " << *AR << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either " << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false) |
1118 | << "LAA: Added an overflow assumption\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Non unit strided pointer which is not either " << "inbounds or in address space 0 may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" << "LAA: Added an overflow assumption\n" ; } } while (false); |
1119 | PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); |
1120 | } else |
1121 | return 0; |
1122 | } |
1123 | |
1124 | return Stride; |
1125 | } |
1126 | |
1127 | bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL, |
1128 | ScalarEvolution &SE, |
1129 | SmallVectorImpl<unsigned> &SortedIndices) { |
1130 | assert(llvm::all_of(((llvm::all_of( VL, [](const Value *V) { return V->getType ()->isPointerTy(); }) && "Expected list of pointer operands." ) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1132, __PRETTY_FUNCTION__)) |
1131 | VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&((llvm::all_of( VL, [](const Value *V) { return V->getType ()->isPointerTy(); }) && "Expected list of pointer operands." ) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1132, __PRETTY_FUNCTION__)) |
1132 | "Expected list of pointer operands.")((llvm::all_of( VL, [](const Value *V) { return V->getType ()->isPointerTy(); }) && "Expected list of pointer operands." ) ? static_cast<void> (0) : __assert_fail ("llvm::all_of( VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && \"Expected list of pointer operands.\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1132, __PRETTY_FUNCTION__)); |
1133 | SmallVector<std::pair<int64_t, Value *>, 4> OffValPairs; |
1134 | OffValPairs.reserve(VL.size()); |
1135 | |
1136 | // Walk over the pointers, and map each of them to an offset relative to |
1137 | // first pointer in the array. |
1138 | Value *Ptr0 = VL[0]; |
1139 | const SCEV *Scev0 = SE.getSCEV(Ptr0); |
1140 | Value *Obj0 = getUnderlyingObject(Ptr0); |
1141 | |
1142 | llvm::SmallSet<int64_t, 4> Offsets; |
1143 | for (auto *Ptr : VL) { |
1144 | // TODO: Outline this code as a special, more time consuming, version of |
1145 | // computeConstantDifference() function. |
1146 | if (Ptr->getType()->getPointerAddressSpace() != |
1147 | Ptr0->getType()->getPointerAddressSpace()) |
1148 | return false; |
1149 | // If a pointer refers to a different underlying object, bail - the |
1150 | // pointers are by definition incomparable. |
1151 | Value *CurrObj = getUnderlyingObject(Ptr); |
1152 | if (CurrObj != Obj0) |
1153 | return false; |
1154 | |
1155 | const SCEV *Scev = SE.getSCEV(Ptr); |
1156 | const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0)); |
1157 | // The pointers may not have a constant offset from each other, or SCEV |
1158 | // may just not be smart enough to figure out they do. Regardless, |
1159 | // there's nothing we can do. |
1160 | if (!Diff) |
1161 | return false; |
1162 | |
1163 | // Check if the pointer with the same offset is found. |
1164 | int64_t Offset = Diff->getAPInt().getSExtValue(); |
1165 | if (!Offsets.insert(Offset).second) |
1166 | return false; |
1167 | OffValPairs.emplace_back(Offset, Ptr); |
1168 | } |
1169 | SortedIndices.clear(); |
1170 | SortedIndices.resize(VL.size()); |
1171 | std::iota(SortedIndices.begin(), SortedIndices.end(), 0); |
1172 | |
1173 | // Sort the memory accesses and keep the order of their uses in UseOrder. |
1174 | llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) { |
1175 | return OffValPairs[Left].first < OffValPairs[Right].first; |
1176 | }); |
1177 | |
1178 | // Check if the order is consecutive already. |
1179 | if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) { |
1180 | return I == SortedIndices[I]; |
1181 | })) |
1182 | SortedIndices.clear(); |
1183 | |
1184 | return true; |
1185 | } |
1186 | |
1187 | /// Take the address space operand from the Load/Store instruction. |
1188 | /// Returns -1 if this is not a valid Load/Store instruction. |
1189 | static unsigned getAddressSpaceOperand(Value *I) { |
1190 | if (LoadInst *L = dyn_cast<LoadInst>(I)) |
1191 | return L->getPointerAddressSpace(); |
1192 | if (StoreInst *S = dyn_cast<StoreInst>(I)) |
1193 | return S->getPointerAddressSpace(); |
1194 | return -1; |
1195 | } |
1196 | |
1197 | /// Returns true if the memory operations \p A and \p B are consecutive. |
1198 | bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, |
1199 | ScalarEvolution &SE, bool CheckType) { |
1200 | Value *PtrA = getLoadStorePointerOperand(A); |
1201 | Value *PtrB = getLoadStorePointerOperand(B); |
1202 | unsigned ASA = getAddressSpaceOperand(A); |
1203 | unsigned ASB = getAddressSpaceOperand(B); |
1204 | |
1205 | // Check that the address spaces match and that the pointers are valid. |
1206 | if (!PtrA || !PtrB || (ASA != ASB)) |
1207 | return false; |
1208 | |
1209 | // Make sure that A and B are different pointers. |
1210 | if (PtrA == PtrB) |
1211 | return false; |
1212 | |
1213 | // Make sure that A and B have the same type if required. |
1214 | if (CheckType && PtrA->getType() != PtrB->getType()) |
1215 | return false; |
1216 | |
1217 | unsigned IdxWidth = DL.getIndexSizeInBits(ASA); |
1218 | Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); |
1219 | |
1220 | APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); |
1221 | PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); |
1222 | PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); |
1223 | |
1224 | // Retrieve the address space again as pointer stripping now tracks through |
1225 | // `addrspacecast`. |
1226 | ASA = cast<PointerType>(PtrA->getType())->getAddressSpace(); |
1227 | ASB = cast<PointerType>(PtrB->getType())->getAddressSpace(); |
1228 | // Check that the address spaces match and that the pointers are valid. |
1229 | if (ASA != ASB) |
1230 | return false; |
1231 | |
1232 | IdxWidth = DL.getIndexSizeInBits(ASA); |
1233 | OffsetA = OffsetA.sextOrTrunc(IdxWidth); |
1234 | OffsetB = OffsetB.sextOrTrunc(IdxWidth); |
1235 | |
1236 | APInt Size(IdxWidth, DL.getTypeStoreSize(Ty)); |
1237 | |
1238 | // OffsetDelta = OffsetB - OffsetA; |
1239 | const SCEV *OffsetSCEVA = SE.getConstant(OffsetA); |
1240 | const SCEV *OffsetSCEVB = SE.getConstant(OffsetB); |
1241 | const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA); |
1242 | const APInt &OffsetDelta = cast<SCEVConstant>(OffsetDeltaSCEV)->getAPInt(); |
1243 | |
1244 | // Check if they are based on the same pointer. That makes the offsets |
1245 | // sufficient. |
1246 | if (PtrA == PtrB) |
1247 | return OffsetDelta == Size; |
1248 | |
1249 | // Compute the necessary base pointer delta to have the necessary final delta |
1250 | // equal to the size. |
1251 | // BaseDelta = Size - OffsetDelta; |
1252 | const SCEV *SizeSCEV = SE.getConstant(Size); |
1253 | const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV); |
1254 | |
1255 | // Otherwise compute the distance with SCEV between the base pointers. |
1256 | const SCEV *PtrSCEVA = SE.getSCEV(PtrA); |
1257 | const SCEV *PtrSCEVB = SE.getSCEV(PtrB); |
1258 | const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta); |
1259 | return X == PtrSCEVB; |
1260 | } |
1261 | |
1262 | MemoryDepChecker::VectorizationSafetyStatus |
1263 | MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { |
1264 | switch (Type) { |
1265 | case NoDep: |
1266 | case Forward: |
1267 | case BackwardVectorizable: |
1268 | return VectorizationSafetyStatus::Safe; |
1269 | |
1270 | case Unknown: |
1271 | return VectorizationSafetyStatus::PossiblySafeWithRtChecks; |
1272 | case ForwardButPreventsForwarding: |
1273 | case Backward: |
1274 | case BackwardVectorizableButPreventsForwarding: |
1275 | return VectorizationSafetyStatus::Unsafe; |
1276 | } |
1277 | llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1277); |
1278 | } |
1279 | |
1280 | bool MemoryDepChecker::Dependence::isBackward() const { |
1281 | switch (Type) { |
1282 | case NoDep: |
1283 | case Forward: |
1284 | case ForwardButPreventsForwarding: |
1285 | case Unknown: |
1286 | return false; |
1287 | |
1288 | case BackwardVectorizable: |
1289 | case Backward: |
1290 | case BackwardVectorizableButPreventsForwarding: |
1291 | return true; |
1292 | } |
1293 | llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1293); |
1294 | } |
1295 | |
1296 | bool MemoryDepChecker::Dependence::isPossiblyBackward() const { |
1297 | return isBackward() || Type == Unknown; |
1298 | } |
1299 | |
1300 | bool MemoryDepChecker::Dependence::isForward() const { |
1301 | switch (Type) { |
1302 | case Forward: |
1303 | case ForwardButPreventsForwarding: |
1304 | return true; |
1305 | |
1306 | case NoDep: |
1307 | case Unknown: |
1308 | case BackwardVectorizable: |
1309 | case Backward: |
1310 | case BackwardVectorizableButPreventsForwarding: |
1311 | return false; |
1312 | } |
1313 | llvm_unreachable("unexpected DepType!")::llvm::llvm_unreachable_internal("unexpected DepType!", "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1313); |
1314 | } |
1315 | |
1316 | bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, |
1317 | uint64_t TypeByteSize) { |
1318 | // If loads occur at a distance that is not a multiple of a feasible vector |
1319 | // factor store-load forwarding does not take place. |
1320 | // Positive dependences might cause troubles because vectorizing them might |
1321 | // prevent store-load forwarding making vectorized code run a lot slower. |
1322 | // a[i] = a[i-3] ^ a[i-8]; |
1323 | // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and |
1324 | // hence on your typical architecture store-load forwarding does not take |
1325 | // place. Vectorizing in such cases does not make sense. |
1326 | // Store-load forwarding distance. |
1327 | |
1328 | // After this many iterations store-to-load forwarding conflicts should not |
1329 | // cause any slowdowns. |
1330 | const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; |
1331 | // Maximum vector factor. |
1332 | uint64_t MaxVFWithoutSLForwardIssues = std::min( |
1333 | VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes); |
1334 | |
1335 | // Compute the smallest VF at which the store and load would be misaligned. |
1336 | for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; |
1337 | VF *= 2) { |
1338 | // If the number of vector iteration between the store and the load are |
1339 | // small we could incur conflicts. |
1340 | if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { |
1341 | MaxVFWithoutSLForwardIssues = (VF >>= 1); |
Although the value stored to 'VF' is used in the enclosing expression, the value is never actually read from 'VF' | |
1342 | break; |
1343 | } |
1344 | } |
1345 | |
1346 | if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { |
1347 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n" ; } } while (false) |
1348 | dbgs() << "LAA: Distance " << Distancedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n" ; } } while (false) |
1349 | << " that could cause a store-load forwarding conflict\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Distance " << Distance << " that could cause a store-load forwarding conflict\n" ; } } while (false); |
1350 | return true; |
1351 | } |
1352 | |
1353 | if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes && |
1354 | MaxVFWithoutSLForwardIssues != |
1355 | VectorizerParams::MaxVectorWidth * TypeByteSize) |
1356 | MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues; |
1357 | return false; |
1358 | } |
1359 | |
1360 | void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { |
1361 | if (Status < S) |
1362 | Status = S; |
1363 | } |
1364 | |
1365 | /// Given a non-constant (unknown) dependence-distance \p Dist between two |
1366 | /// memory accesses, that have the same stride whose absolute value is given |
1367 | /// in \p Stride, and that have the same type size \p TypeByteSize, |
1368 | /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is |
1369 | /// possible to prove statically that the dependence distance is larger |
1370 | /// than the range that the accesses will travel through the execution of |
1371 | /// the loop. If so, return true; false otherwise. This is useful for |
1372 | /// example in loops such as the following (PR31098): |
1373 | /// for (i = 0; i < D; ++i) { |
1374 | /// = out[i]; |
1375 | /// out[i+D] = |
1376 | /// } |
1377 | static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, |
1378 | const SCEV &BackedgeTakenCount, |
1379 | const SCEV &Dist, uint64_t Stride, |
1380 | uint64_t TypeByteSize) { |
1381 | |
1382 | // If we can prove that |
1383 | // (**) |Dist| > BackedgeTakenCount * Step |
1384 | // where Step is the absolute stride of the memory accesses in bytes, |
1385 | // then there is no dependence. |
1386 | // |
1387 | // Rationale: |
1388 | // We basically want to check if the absolute distance (|Dist/Step|) |
1389 | // is >= the loop iteration count (or > BackedgeTakenCount). |
1390 | // This is equivalent to the Strong SIV Test (Practical Dependence Testing, |
1391 | // Section 4.2.1); Note, that for vectorization it is sufficient to prove |
1392 | // that the dependence distance is >= VF; This is checked elsewhere. |
1393 | // But in some cases we can prune unknown dependence distances early, and |
1394 | // even before selecting the VF, and without a runtime test, by comparing |
1395 | // the distance against the loop iteration count. Since the vectorized code |
1396 | // will be executed only if LoopCount >= VF, proving distance >= LoopCount |
1397 | // also guarantees that distance >= VF. |
1398 | // |
1399 | const uint64_t ByteStride = Stride * TypeByteSize; |
1400 | const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride); |
1401 | const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step); |
1402 | |
1403 | const SCEV *CastedDist = &Dist; |
1404 | const SCEV *CastedProduct = Product; |
1405 | uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType()); |
1406 | uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType()); |
1407 | |
1408 | // The dependence distance can be positive/negative, so we sign extend Dist; |
1409 | // The multiplication of the absolute stride in bytes and the |
1410 | // backedgeTakenCount is non-negative, so we zero extend Product. |
1411 | if (DistTypeSize > ProductTypeSize) |
1412 | CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); |
1413 | else |
1414 | CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); |
1415 | |
1416 | // Is Dist - (BackedgeTakenCount * Step) > 0 ? |
1417 | // (If so, then we have proven (**) because |Dist| >= Dist) |
1418 | const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); |
1419 | if (SE.isKnownPositive(Minus)) |
1420 | return true; |
1421 | |
1422 | // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ? |
1423 | // (If so, then we have proven (**) because |Dist| >= -1*Dist) |
1424 | const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); |
1425 | Minus = SE.getMinusSCEV(NegDist, CastedProduct); |
1426 | if (SE.isKnownPositive(Minus)) |
1427 | return true; |
1428 | |
1429 | return false; |
1430 | } |
1431 | |
1432 | /// Check the dependence for two accesses with the same stride \p Stride. |
1433 | /// \p Distance is the positive distance and \p TypeByteSize is type size in |
1434 | /// bytes. |
1435 | /// |
1436 | /// \returns true if they are independent. |
1437 | static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, |
1438 | uint64_t TypeByteSize) { |
1439 | assert(Stride > 1 && "The stride must be greater than 1")((Stride > 1 && "The stride must be greater than 1" ) ? static_cast<void> (0) : __assert_fail ("Stride > 1 && \"The stride must be greater than 1\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1439, __PRETTY_FUNCTION__)); |
1440 | assert(TypeByteSize > 0 && "The type size in byte must be non-zero")((TypeByteSize > 0 && "The type size in byte must be non-zero" ) ? static_cast<void> (0) : __assert_fail ("TypeByteSize > 0 && \"The type size in byte must be non-zero\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1440, __PRETTY_FUNCTION__)); |
1441 | assert(Distance > 0 && "The distance must be non-zero")((Distance > 0 && "The distance must be non-zero") ? static_cast<void> (0) : __assert_fail ("Distance > 0 && \"The distance must be non-zero\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1441, __PRETTY_FUNCTION__)); |
1442 | |
1443 | // Skip if the distance is not multiple of type byte size. |
1444 | if (Distance % TypeByteSize) |
1445 | return false; |
1446 | |
1447 | uint64_t ScaledDist = Distance / TypeByteSize; |
1448 | |
1449 | // No dependence if the scaled distance is not multiple of the stride. |
1450 | // E.g. |
1451 | // for (i = 0; i < 1024 ; i += 4) |
1452 | // A[i+2] = A[i] + 1; |
1453 | // |
1454 | // Two accesses in memory (scaled distance is 2, stride is 4): |
1455 | // | A[0] | | | | A[4] | | | | |
1456 | // | | | A[2] | | | | A[6] | | |
1457 | // |
1458 | // E.g. |
1459 | // for (i = 0; i < 1024 ; i += 3) |
1460 | // A[i+4] = A[i] + 1; |
1461 | // |
1462 | // Two accesses in memory (scaled distance is 4, stride is 3): |
1463 | // | A[0] | | | A[3] | | | A[6] | | | |
1464 | // | | | | | A[4] | | | A[7] | | |
1465 | return ScaledDist % Stride; |
1466 | } |
1467 | |
1468 | MemoryDepChecker::Dependence::DepType |
1469 | MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, |
1470 | const MemAccessInfo &B, unsigned BIdx, |
1471 | const ValueToValueMap &Strides) { |
1472 | assert (AIdx < BIdx && "Must pass arguments in program order")((AIdx < BIdx && "Must pass arguments in program order" ) ? static_cast<void> (0) : __assert_fail ("AIdx < BIdx && \"Must pass arguments in program order\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1472, __PRETTY_FUNCTION__)); |
1473 | |
1474 | Value *APtr = A.getPointer(); |
1475 | Value *BPtr = B.getPointer(); |
1476 | bool AIsWrite = A.getInt(); |
1477 | bool BIsWrite = B.getInt(); |
1478 | |
1479 | // Two reads are independent. |
1480 | if (!AIsWrite && !BIsWrite) |
1481 | return Dependence::NoDep; |
1482 | |
1483 | // We cannot check pointers in different address spaces. |
1484 | if (APtr->getType()->getPointerAddressSpace() != |
1485 | BPtr->getType()->getPointerAddressSpace()) |
1486 | return Dependence::Unknown; |
1487 | |
1488 | int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true); |
1489 | int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true); |
1490 | |
1491 | const SCEV *Src = PSE.getSCEV(APtr); |
1492 | const SCEV *Sink = PSE.getSCEV(BPtr); |
1493 | |
1494 | // If the induction step is negative we have to invert source and sink of the |
1495 | // dependence. |
1496 | if (StrideAPtr < 0) { |
1497 | std::swap(APtr, BPtr); |
1498 | std::swap(Src, Sink); |
1499 | std::swap(AIsWrite, BIsWrite); |
1500 | std::swap(AIdx, BIdx); |
1501 | std::swap(StrideAPtr, StrideBPtr); |
1502 | } |
1503 | |
1504 | const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); |
1505 | |
1506 | LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sinkdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink << "(Induction step: " << StrideAPtr << ")\n"; } } while (false) |
1507 | << "(Induction step: " << StrideAPtr << ")\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink << "(Induction step: " << StrideAPtr << ")\n"; } } while (false); |
1508 | LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"; } } while (false) |
1509 | << *InstMap[BIdx] << ": " << *Dist << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " << *InstMap[BIdx] << ": " << *Dist << "\n"; } } while (false); |
1510 | |
1511 | // Need accesses with constant stride. We don't want to vectorize |
1512 | // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in |
1513 | // the address space. |
1514 | if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){ |
1515 | LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "Pointer access with non-constant stride\n" ; } } while (false); |
1516 | return Dependence::Unknown; |
1517 | } |
1518 | |
1519 | Type *ATy = APtr->getType()->getPointerElementType(); |
1520 | Type *BTy = BPtr->getType()->getPointerElementType(); |
1521 | auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); |
1522 | uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); |
1523 | uint64_t Stride = std::abs(StrideAPtr); |
1524 | const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); |
1525 | if (!C) { |
1526 | if (TypeByteSize == DL.getTypeAllocSize(BTy) && |
1527 | isSafeDependenceDistance(DL, *(PSE.getSE()), |
1528 | *(PSE.getBackedgeTakenCount()), *Dist, Stride, |
1529 | TypeByteSize)) |
1530 | return Dependence::NoDep; |
1531 | |
1532 | LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Dependence because of non-constant distance\n" ; } } while (false); |
1533 | FoundNonConstantDistanceDependence = true; |
1534 | return Dependence::Unknown; |
1535 | } |
1536 | |
1537 | const APInt &Val = C->getAPInt(); |
1538 | int64_t Distance = Val.getSExtValue(); |
1539 | |
1540 | // Attempt to prove strided accesses independent. |
1541 | if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && |
1542 | areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { |
1543 | LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Strided accesses are independent\n" ; } } while (false); |
1544 | return Dependence::NoDep; |
1545 | } |
1546 | |
1547 | // Negative distances are not plausible dependencies. |
1548 | if (Val.isNegative()) { |
1549 | bool IsTrueDataDependence = (AIsWrite && !BIsWrite); |
1550 | if (IsTrueDataDependence && EnableForwardingConflictDetection && |
1551 | (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || |
1552 | ATy != BTy)) { |
1553 | LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Forward but may prevent st->ld forwarding\n" ; } } while (false); |
1554 | return Dependence::ForwardButPreventsForwarding; |
1555 | } |
1556 | |
1557 | LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Dependence is negative\n" ; } } while (false); |
1558 | return Dependence::Forward; |
1559 | } |
1560 | |
1561 | // Write to the same location with the same size. |
1562 | // Could be improved to assert type sizes are the same (i32 == float, etc). |
1563 | if (Val == 0) { |
1564 | if (ATy == BTy) |
1565 | return Dependence::Forward; |
1566 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Zero dependence difference but different types\n" ; } } while (false) |
1567 | dbgs() << "LAA: Zero dependence difference but different types\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Zero dependence difference but different types\n" ; } } while (false); |
1568 | return Dependence::Unknown; |
1569 | } |
1570 | |
1571 | assert(Val.isStrictlyPositive() && "Expect a positive value")((Val.isStrictlyPositive() && "Expect a positive value" ) ? static_cast<void> (0) : __assert_fail ("Val.isStrictlyPositive() && \"Expect a positive value\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1571, __PRETTY_FUNCTION__)); |
1572 | |
1573 | if (ATy != BTy) { |
1574 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n" ; } } while (false) |
1575 | dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n" ; } } while (false) |
1576 | << "LAA: ReadWrite-Write positive dependency with different types\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n" ; } } while (false); |
1577 | return Dependence::Unknown; |
1578 | } |
1579 | |
1580 | // Bail out early if passed-in parameters make vectorization not feasible. |
1581 | unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? |
1582 | VectorizerParams::VectorizationFactor : 1); |
1583 | unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? |
1584 | VectorizerParams::VectorizationInterleave : 1); |
1585 | // The minimum number of iterations for a vectorized/unrolled version. |
1586 | unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); |
1587 | |
1588 | // It's not vectorizable if the distance is smaller than the minimum distance |
1589 | // needed for a vectroized/unrolled version. Vectorizing one iteration in |
1590 | // front needs TypeByteSize * Stride. Vectorizing the last iteration needs |
1591 | // TypeByteSize (No need to plus the last gap distance). |
1592 | // |
1593 | // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. |
1594 | // foo(int *A) { |
1595 | // int *B = (int *)((char *)A + 14); |
1596 | // for (i = 0 ; i < 1024 ; i += 2) |
1597 | // B[i] = A[i] + 1; |
1598 | // } |
1599 | // |
1600 | // Two accesses in memory (stride is 2): |
1601 | // | A[0] | | A[2] | | A[4] | | A[6] | | |
1602 | // | B[0] | | B[2] | | B[4] | |
1603 | // |
1604 | // Distance needs for vectorizing iterations except the last iteration: |
1605 | // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4. |
1606 | // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. |
1607 | // |
1608 | // If MinNumIter is 2, it is vectorizable as the minimum distance needed is |
1609 | // 12, which is less than distance. |
1610 | // |
1611 | // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), |
1612 | // the minimum distance needed is 28, which is greater than distance. It is |
1613 | // not safe to do vectorization. |
1614 | uint64_t MinDistanceNeeded = |
1615 | TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize; |
1616 | if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) { |
1617 | LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'; } } while (false) |
1618 | << Distance << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Failure because of positive distance " << Distance << '\n'; } } while (false); |
1619 | return Dependence::Backward; |
1620 | } |
1621 | |
1622 | // Unsafe if the minimum distance needed is greater than max safe distance. |
1623 | if (MinDistanceNeeded > MaxSafeDepDistBytes) { |
1624 | LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Failure because it needs at least " << MinDistanceNeeded << " size in bytes"; } } while (false) |
1625 | << MinDistanceNeeded << " size in bytes")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Failure because it needs at least " << MinDistanceNeeded << " size in bytes"; } } while (false); |
1626 | return Dependence::Backward; |
1627 | } |
1628 | |
1629 | // Positive distance bigger than max vectorization factor. |
1630 | // FIXME: Should use max factor instead of max distance in bytes, which could |
1631 | // not handle different types. |
1632 | // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. |
1633 | // void foo (int *A, char *B) { |
1634 | // for (unsigned i = 0; i < 1024; i++) { |
1635 | // A[i+2] = A[i] + 1; |
1636 | // B[i+2] = B[i] + 1; |
1637 | // } |
1638 | // } |
1639 | // |
1640 | // This case is currently unsafe according to the max safe distance. If we |
1641 | // analyze the two accesses on array B, the max safe dependence distance |
1642 | // is 2. Then we analyze the accesses on array A, the minimum distance needed |
1643 | // is 8, which is less than 2 and forbidden vectorization, But actually |
1644 | // both A and B could be vectorized by 2 iterations. |
1645 | MaxSafeDepDistBytes = |
1646 | std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes); |
1647 | |
1648 | bool IsTrueDataDependence = (!AIsWrite && BIsWrite); |
1649 | if (IsTrueDataDependence && EnableForwardingConflictDetection && |
1650 | couldPreventStoreLoadForward(Distance, TypeByteSize)) |
1651 | return Dependence::BackwardVectorizableButPreventsForwarding; |
1652 | |
1653 | uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride); |
1654 | LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'; } } while (false) |
1655 | << " with max VF = " << MaxVF << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Positive distance " << Val.getSExtValue() << " with max VF = " << MaxVF << '\n'; } } while (false); |
1656 | uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; |
1657 | MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); |
1658 | return Dependence::BackwardVectorizable; |
1659 | } |
1660 | |
1661 | bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, |
1662 | MemAccessInfoList &CheckDeps, |
1663 | const ValueToValueMap &Strides) { |
1664 | |
1665 | MaxSafeDepDistBytes = -1; |
1666 | SmallPtrSet<MemAccessInfo, 8> Visited; |
1667 | for (MemAccessInfo CurAccess : CheckDeps) { |
1668 | if (Visited.count(CurAccess)) |
1669 | continue; |
1670 | |
1671 | // Get the relevant memory access set. |
1672 | EquivalenceClasses<MemAccessInfo>::iterator I = |
1673 | AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); |
1674 | |
1675 | // Check accesses within this set. |
1676 | EquivalenceClasses<MemAccessInfo>::member_iterator AI = |
1677 | AccessSets.member_begin(I); |
1678 | EquivalenceClasses<MemAccessInfo>::member_iterator AE = |
1679 | AccessSets.member_end(); |
1680 | |
1681 | // Check every access pair. |
1682 | while (AI != AE) { |
1683 | Visited.insert(*AI); |
1684 | bool AIIsWrite = AI->getInt(); |
1685 | // Check loads only against next equivalent class, but stores also against |
1686 | // other stores in the same equivalence class - to the same address. |
1687 | EquivalenceClasses<MemAccessInfo>::member_iterator OI = |
1688 | (AIIsWrite ? AI : std::next(AI)); |
1689 | while (OI != AE) { |
1690 | // Check every accessing instruction pair in program order. |
1691 | for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), |
1692 | I1E = Accesses[*AI].end(); I1 != I1E; ++I1) |
1693 | // Scan all accesses of another equivalence class, but only the next |
1694 | // accesses of the same equivalent class. |
1695 | for (std::vector<unsigned>::iterator |
1696 | I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), |
1697 | I2E = (OI == AI ? I1E : Accesses[*OI].end()); |
1698 | I2 != I2E; ++I2) { |
1699 | auto A = std::make_pair(&*AI, *I1); |
1700 | auto B = std::make_pair(&*OI, *I2); |
1701 | |
1702 | assert(*I1 != *I2)((*I1 != *I2) ? static_cast<void> (0) : __assert_fail ( "*I1 != *I2", "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 1702, __PRETTY_FUNCTION__)); |
1703 | if (*I1 > *I2) |
1704 | std::swap(A, B); |
1705 | |
1706 | Dependence::DepType Type = |
1707 | isDependent(*A.first, A.second, *B.first, B.second, Strides); |
1708 | mergeInStatus(Dependence::isSafeForVectorization(Type)); |
1709 | |
1710 | // Gather dependences unless we accumulated MaxDependences |
1711 | // dependences. In that case return as soon as we find the first |
1712 | // unsafe dependence. This puts a limit on this quadratic |
1713 | // algorithm. |
1714 | if (RecordDependences) { |
1715 | if (Type != Dependence::NoDep) |
1716 | Dependences.push_back(Dependence(A.second, B.second, Type)); |
1717 | |
1718 | if (Dependences.size() >= MaxDependences) { |
1719 | RecordDependences = false; |
1720 | Dependences.clear(); |
1721 | LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "Too many dependences, stopped recording\n" ; } } while (false) |
1722 | << "Too many dependences, stopped recording\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "Too many dependences, stopped recording\n" ; } } while (false); |
1723 | } |
1724 | } |
1725 | if (!RecordDependences && !isSafeForVectorization()) |
1726 | return false; |
1727 | } |
1728 | ++OI; |
1729 | } |
1730 | AI++; |
1731 | } |
1732 | } |
1733 | |
1734 | LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "Total Dependences: " << Dependences.size() << "\n"; } } while (false); |
1735 | return isSafeForVectorization(); |
1736 | } |
1737 | |
1738 | SmallVector<Instruction *, 4> |
1739 | MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const { |
1740 | MemAccessInfo Access(Ptr, isWrite); |
1741 | auto &IndexVector = Accesses.find(Access)->second; |
1742 | |
1743 | SmallVector<Instruction *, 4> Insts; |
1744 | transform(IndexVector, |
1745 | std::back_inserter(Insts), |
1746 | [&](unsigned Idx) { return this->InstMap[Idx]; }); |
1747 | return Insts; |
1748 | } |
1749 | |
1750 | const char *MemoryDepChecker::Dependence::DepName[] = { |
1751 | "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward", |
1752 | "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"}; |
1753 | |
1754 | void MemoryDepChecker::Dependence::print( |
1755 | raw_ostream &OS, unsigned Depth, |
1756 | const SmallVectorImpl<Instruction *> &Instrs) const { |
1757 | OS.indent(Depth) << DepName[Type] << ":\n"; |
1758 | OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; |
1759 | OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; |
1760 | } |
1761 | |
1762 | bool LoopAccessInfo::canAnalyzeLoop() { |
1763 | // We need to have a loop header. |
1764 | LLVM_DEBUG(dbgs() << "LAA: Found a loop in "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a loop in " << TheLoop->getHeader()->getParent()->getName() << ": " << TheLoop->getHeader()->getName() << '\n'; } } while (false) |
1765 | << TheLoop->getHeader()->getParent()->getName() << ": "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a loop in " << TheLoop->getHeader()->getParent()->getName() << ": " << TheLoop->getHeader()->getName() << '\n'; } } while (false) |
1766 | << TheLoop->getHeader()->getName() << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a loop in " << TheLoop->getHeader()->getParent()->getName() << ": " << TheLoop->getHeader()->getName() << '\n'; } } while (false); |
1767 | |
1768 | // We can only analyze innermost loops. |
1769 | if (!TheLoop->isInnermost()) { |
1770 | LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: loop is not the innermost loop\n" ; } } while (false); |
1771 | recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; |
1772 | return false; |
1773 | } |
1774 | |
1775 | // We must have a single backedge. |
1776 | if (TheLoop->getNumBackEdges() != 1) { |
1777 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n" ; } } while (false) |
1778 | dbgs() << "LAA: loop control flow is not understood by analyzer\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: loop control flow is not understood by analyzer\n" ; } } while (false); |
1779 | recordAnalysis("CFGNotUnderstood") |
1780 | << "loop control flow is not understood by analyzer"; |
1781 | return false; |
1782 | } |
1783 | |
1784 | // ScalarEvolution needs to be able to find the exit count. |
1785 | const SCEV *ExitCount = PSE->getBackedgeTakenCount(); |
1786 | if (isa<SCEVCouldNotCompute>(ExitCount)) { |
1787 | recordAnalysis("CantComputeNumberOfIterations") |
1788 | << "could not determine number of loop iterations"; |
1789 | LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: SCEV could not compute the loop exit count.\n" ; } } while (false); |
1790 | return false; |
1791 | } |
1792 | |
1793 | return true; |
1794 | } |
1795 | |
1796 | void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, |
1797 | const TargetLibraryInfo *TLI, |
1798 | DominatorTree *DT) { |
1799 | typedef SmallPtrSet<Value*, 16> ValueSet; |
1800 | |
1801 | // Holds the Load and Store instructions. |
1802 | SmallVector<LoadInst *, 16> Loads; |
1803 | SmallVector<StoreInst *, 16> Stores; |
1804 | |
1805 | // Holds all the different accesses in the loop. |
1806 | unsigned NumReads = 0; |
1807 | unsigned NumReadWrites = 0; |
1808 | |
1809 | bool HasComplexMemInst = false; |
1810 | |
1811 | // A runtime check is only legal to insert if there are no convergent calls. |
1812 | HasConvergentOp = false; |
1813 | |
1814 | PtrRtChecking->Pointers.clear(); |
1815 | PtrRtChecking->Need = false; |
1816 | |
1817 | const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); |
1818 | |
1819 | const bool EnableMemAccessVersioningOfLoop = |
1820 | EnableMemAccessVersioning && |
1821 | !TheLoop->getHeader()->getParent()->hasOptSize(); |
1822 | |
1823 | // For each block. |
1824 | for (BasicBlock *BB : TheLoop->blocks()) { |
1825 | // Scan the BB and collect legal loads and stores. Also detect any |
1826 | // convergent instructions. |
1827 | for (Instruction &I : *BB) { |
1828 | if (auto *Call = dyn_cast<CallBase>(&I)) { |
1829 | if (Call->isConvergent()) |
1830 | HasConvergentOp = true; |
1831 | } |
1832 | |
1833 | // With both a non-vectorizable memory instruction and a convergent |
1834 | // operation, found in this loop, no reason to continue the search. |
1835 | if (HasComplexMemInst && HasConvergentOp) { |
1836 | CanVecMem = false; |
1837 | return; |
1838 | } |
1839 | |
1840 | // Avoid hitting recordAnalysis multiple times. |
1841 | if (HasComplexMemInst) |
1842 | continue; |
1843 | |
1844 | // If this is a load, save it. If this instruction can read from memory |
1845 | // but is not a load, then we quit. Notice that we don't handle function |
1846 | // calls that read or write. |
1847 | if (I.mayReadFromMemory()) { |
1848 | // Many math library functions read the rounding mode. We will only |
1849 | // vectorize a loop if it contains known function calls that don't set |
1850 | // the flag. Therefore, it is safe to ignore this read from memory. |
1851 | auto *Call = dyn_cast<CallInst>(&I); |
1852 | if (Call && getVectorIntrinsicIDForCall(Call, TLI)) |
1853 | continue; |
1854 | |
1855 | // If the function has an explicit vectorized counterpart, we can safely |
1856 | // assume that it can be vectorized. |
1857 | if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && |
1858 | !VFDatabase::getMappings(*Call).empty()) |
1859 | continue; |
1860 | |
1861 | auto *Ld = dyn_cast<LoadInst>(&I); |
1862 | if (!Ld) { |
1863 | recordAnalysis("CantVectorizeInstruction", Ld) |
1864 | << "instruction cannot be vectorized"; |
1865 | HasComplexMemInst = true; |
1866 | continue; |
1867 | } |
1868 | if (!Ld->isSimple() && !IsAnnotatedParallel) { |
1869 | recordAnalysis("NonSimpleLoad", Ld) |
1870 | << "read with atomic ordering or volatile read"; |
1871 | LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a non-simple load.\n" ; } } while (false); |
1872 | HasComplexMemInst = true; |
1873 | continue; |
1874 | } |
1875 | NumLoads++; |
1876 | Loads.push_back(Ld); |
1877 | DepChecker->addAccess(Ld); |
1878 | if (EnableMemAccessVersioningOfLoop) |
1879 | collectStridedAccess(Ld); |
1880 | continue; |
1881 | } |
1882 | |
1883 | // Save 'store' instructions. Abort if other instructions write to memory. |
1884 | if (I.mayWriteToMemory()) { |
1885 | auto *St = dyn_cast<StoreInst>(&I); |
1886 | if (!St) { |
1887 | recordAnalysis("CantVectorizeInstruction", St) |
1888 | << "instruction cannot be vectorized"; |
1889 | HasComplexMemInst = true; |
1890 | continue; |
1891 | } |
1892 | if (!St->isSimple() && !IsAnnotatedParallel) { |
1893 | recordAnalysis("NonSimpleStore", St) |
1894 | << "write with atomic ordering or volatile write"; |
1895 | LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a non-simple store.\n" ; } } while (false); |
1896 | HasComplexMemInst = true; |
1897 | continue; |
1898 | } |
1899 | NumStores++; |
1900 | Stores.push_back(St); |
1901 | DepChecker->addAccess(St); |
1902 | if (EnableMemAccessVersioningOfLoop) |
1903 | collectStridedAccess(St); |
1904 | } |
1905 | } // Next instr. |
1906 | } // Next block. |
1907 | |
1908 | if (HasComplexMemInst) { |
1909 | CanVecMem = false; |
1910 | return; |
1911 | } |
1912 | |
1913 | // Now we have two lists that hold the loads and the stores. |
1914 | // Next, we find the pointers that they use. |
1915 | |
1916 | // Check if we see any stores. If there are no stores, then we don't |
1917 | // care if the pointers are *restrict*. |
1918 | if (!Stores.size()) { |
1919 | LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a read-only loop!\n" ; } } while (false); |
1920 | CanVecMem = true; |
1921 | return; |
1922 | } |
1923 | |
1924 | MemoryDepChecker::DepCandidates DependentAccesses; |
1925 | AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); |
1926 | |
1927 | // Holds the analyzed pointers. We don't want to call getUnderlyingObjects |
1928 | // multiple times on the same object. If the ptr is accessed twice, once |
1929 | // for read and once for write, it will only appear once (on the write |
1930 | // list). This is okay, since we are going to check for conflicts between |
1931 | // writes and between reads and writes, but not between reads and reads. |
1932 | ValueSet Seen; |
1933 | |
1934 | // Record uniform store addresses to identify if we have multiple stores |
1935 | // to the same address. |
1936 | ValueSet UniformStores; |
1937 | |
1938 | for (StoreInst *ST : Stores) { |
1939 | Value *Ptr = ST->getPointerOperand(); |
1940 | |
1941 | if (isUniform(Ptr)) |
1942 | HasDependenceInvolvingLoopInvariantAddress |= |
1943 | !UniformStores.insert(Ptr).second; |
1944 | |
1945 | // If we did *not* see this pointer before, insert it to the read-write |
1946 | // list. At this phase it is only a 'write' list. |
1947 | if (Seen.insert(Ptr).second) { |
1948 | ++NumReadWrites; |
1949 | |
1950 | MemoryLocation Loc = MemoryLocation::get(ST); |
1951 | // The TBAA metadata could have a control dependency on the predication |
1952 | // condition, so we cannot rely on it when determining whether or not we |
1953 | // need runtime pointer checks. |
1954 | if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) |
1955 | Loc.AATags.TBAA = nullptr; |
1956 | |
1957 | Accesses.addStore(Loc); |
1958 | } |
1959 | } |
1960 | |
1961 | if (IsAnnotatedParallel) { |
1962 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " << "checks.\n"; } } while (false) |
1963 | dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " << "checks.\n"; } } while (false) |
1964 | << "checks.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " << "checks.\n"; } } while (false); |
1965 | CanVecMem = true; |
1966 | return; |
1967 | } |
1968 | |
1969 | for (LoadInst *LD : Loads) { |
1970 | Value *Ptr = LD->getPointerOperand(); |
1971 | // If we did *not* see this pointer before, insert it to the |
1972 | // read list. If we *did* see it before, then it is already in |
1973 | // the read-write list. This allows us to vectorize expressions |
1974 | // such as A[i] += x; Because the address of A[i] is a read-write |
1975 | // pointer. This only works if the index of A[i] is consecutive. |
1976 | // If the address of i is unknown (for example A[B[i]]) then we may |
1977 | // read a few words, modify, and write a few words, and some of the |
1978 | // words may be written to the same address. |
1979 | bool IsReadOnlyPtr = false; |
1980 | if (Seen.insert(Ptr).second || |
1981 | !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) { |
1982 | ++NumReads; |
1983 | IsReadOnlyPtr = true; |
1984 | } |
1985 | |
1986 | // See if there is an unsafe dependency between a load to a uniform address and |
1987 | // store to the same uniform address. |
1988 | if (UniformStores.count(Ptr)) { |
1989 | LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found an unsafe dependency between a uniform " "load and uniform store to the same address!\n"; } } while ( false) |
1990 | "load and uniform store to the same address!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found an unsafe dependency between a uniform " "load and uniform store to the same address!\n"; } } while ( false); |
1991 | HasDependenceInvolvingLoopInvariantAddress = true; |
1992 | } |
1993 | |
1994 | MemoryLocation Loc = MemoryLocation::get(LD); |
1995 | // The TBAA metadata could have a control dependency on the predication |
1996 | // condition, so we cannot rely on it when determining whether or not we |
1997 | // need runtime pointer checks. |
1998 | if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) |
1999 | Loc.AATags.TBAA = nullptr; |
2000 | |
2001 | Accesses.addLoad(Loc, IsReadOnlyPtr); |
2002 | } |
2003 | |
2004 | // If we write (or read-write) to a single destination and there are no |
2005 | // other reads in this loop then is it safe to vectorize. |
2006 | if (NumReadWrites == 1 && NumReads == 0) { |
2007 | LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a write-only loop!\n" ; } } while (false); |
2008 | CanVecMem = true; |
2009 | return; |
2010 | } |
2011 | |
2012 | // Build dependence sets and check whether we need a runtime pointer bounds |
2013 | // check. |
2014 | Accesses.buildDependenceSets(); |
2015 | |
2016 | // Find pointers with computable bounds. We are going to use this information |
2017 | // to place a runtime bound check. |
2018 | bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), |
2019 | TheLoop, SymbolicStrides); |
2020 | if (!CanDoRTIfNeeded) { |
2021 | recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds"; |
2022 | LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"; } } while (false) |
2023 | << "the array bounds.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"; } } while (false); |
2024 | CanVecMem = false; |
2025 | return; |
2026 | } |
2027 | |
2028 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n" ; } } while (false) |
2029 | dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n" ; } } while (false); |
2030 | |
2031 | CanVecMem = true; |
2032 | if (Accesses.isDependencyCheckNeeded()) { |
2033 | LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Checking memory dependencies\n" ; } } while (false); |
2034 | CanVecMem = DepChecker->areDepsSafe( |
2035 | DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); |
2036 | MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); |
2037 | |
2038 | if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { |
2039 | LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Retrying with memory checks\n" ; } } while (false); |
2040 | |
2041 | // Clear the dependency checks. We assume they are not needed. |
2042 | Accesses.resetDepChecks(*DepChecker); |
2043 | |
2044 | PtrRtChecking->reset(); |
2045 | PtrRtChecking->Need = true; |
2046 | |
2047 | auto *SE = PSE->getSE(); |
2048 | CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop, |
2049 | SymbolicStrides, true); |
2050 | |
2051 | // Check that we found the bounds for the pointer. |
2052 | if (!CanDoRTIfNeeded) { |
2053 | recordAnalysis("CantCheckMemDepsAtRunTime") |
2054 | << "cannot check memory dependencies at runtime"; |
2055 | LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Can't vectorize with memory checks\n" ; } } while (false); |
2056 | CanVecMem = false; |
2057 | return; |
2058 | } |
2059 | |
2060 | CanVecMem = true; |
2061 | } |
2062 | } |
2063 | |
2064 | if (HasConvergentOp) { |
2065 | recordAnalysis("CantInsertRuntimeCheckWithConvergent") |
2066 | << "cannot add control dependency to convergent operation"; |
2067 | LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: We can't vectorize because a runtime check " "would be needed with a convergent operation\n"; } } while ( false) |
2068 | "would be needed with a convergent operation\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: We can't vectorize because a runtime check " "would be needed with a convergent operation\n"; } } while ( false); |
2069 | CanVecMem = false; |
2070 | return; |
2071 | } |
2072 | |
2073 | if (CanVecMem) |
2074 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n" ; } } while (false) |
2075 | dbgs() << "LAA: No unsafe dependent memory operations in loop. We"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n" ; } } while (false) |
2076 | << (PtrRtChecking->Need ? "" : " don't")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n" ; } } while (false) |
2077 | << " need runtime memory checks.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n" ; } } while (false); |
2078 | else { |
2079 | recordAnalysis("UnsafeMemDep") |
2080 | << "unsafe dependent memory operations in loop. Use " |
2081 | "#pragma loop distribute(enable) to allow loop distribution " |
2082 | "to attempt to isolate the offending operations into a separate " |
2083 | "loop"; |
2084 | LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: unsafe dependent memory operations in loop\n" ; } } while (false); |
2085 | } |
2086 | } |
2087 | |
2088 | bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, |
2089 | DominatorTree *DT) { |
2090 | assert(TheLoop->contains(BB) && "Unknown block used")((TheLoop->contains(BB) && "Unknown block used") ? static_cast<void> (0) : __assert_fail ("TheLoop->contains(BB) && \"Unknown block used\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 2090, __PRETTY_FUNCTION__)); |
2091 | |
2092 | // Blocks that do not dominate the latch need predication. |
2093 | BasicBlock* Latch = TheLoop->getLoopLatch(); |
2094 | return !DT->dominates(BB, Latch); |
2095 | } |
2096 | |
2097 | OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, |
2098 | Instruction *I) { |
2099 | assert(!Report && "Multiple reports generated")((!Report && "Multiple reports generated") ? static_cast <void> (0) : __assert_fail ("!Report && \"Multiple reports generated\"" , "/build/llvm-toolchain-snapshot-12~++20210107111113+c9154e8fa377/llvm/lib/Analysis/LoopAccessAnalysis.cpp" , 2099, __PRETTY_FUNCTION__)); |
2100 | |
2101 | Value *CodeRegion = TheLoop->getHeader(); |
2102 | DebugLoc DL = TheLoop->getStartLoc(); |
2103 | |
2104 | if (I) { |
2105 | CodeRegion = I->getParent(); |
2106 | // If there is no debug location attached to the instruction, revert back to |
2107 | // using the loop's. |
2108 | if (I->getDebugLoc()) |
2109 | DL = I->getDebugLoc(); |
2110 | } |
2111 | |
2112 | Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE"loop-accesses", RemarkName, DL, |
2113 | CodeRegion); |
2114 | return *Report; |
2115 | } |
2116 | |
2117 | bool LoopAccessInfo::isUniform(Value *V) const { |
2118 | auto *SE = PSE->getSE(); |
2119 | // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is |
2120 | // never considered uniform. |
2121 | // TODO: Is this really what we want? Even without FP SCEV, we may want some |
2122 | // trivially loop-invariant FP values to be considered uniform. |
2123 | if (!SE->isSCEVable(V->getType())) |
2124 | return false; |
2125 | return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); |
2126 | } |
2127 | |
2128 | void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { |
2129 | Value *Ptr = getLoadStorePointerOperand(MemAccess); |
2130 | if (!Ptr) |
2131 | return; |
2132 | |
2133 | Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); |
2134 | if (!Stride) |
2135 | return; |
2136 | |
2137 | LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a strided access that is a candidate for " "versioning:"; } } while (false) |
2138 | "versioning:")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a strided access that is a candidate for " "versioning:"; } } while (false); |
2139 | LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"; } } while (false ); |
2140 | |
2141 | // Avoid adding the "Stride == 1" predicate when we know that |
2142 | // Stride >= Trip-Count. Such a predicate will effectively optimize a single |
2143 | // or zero iteration loop, as Trip-Count <= Stride == 1. |
2144 | // |
2145 | // TODO: We are currently not making a very informed decision on when it is |
2146 | // beneficial to apply stride versioning. It might make more sense that the |
2147 | // users of this analysis (such as the vectorizer) will trigger it, based on |
2148 | // their specific cost considerations; For example, in cases where stride |
2149 | // versioning does not help resolving memory accesses/dependences, the |
2150 | // vectorizer should evaluate the cost of the runtime test, and the benefit |
2151 | // of various possible stride specializations, considering the alternatives |
2152 | // of using gather/scatters (if available). |
2153 | |
2154 | const SCEV *StrideExpr = PSE->getSCEV(Stride); |
2155 | const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); |
2156 | |
2157 | // Match the types so we can compare the stride and the BETakenCount. |
2158 | // The Stride can be positive/negative, so we sign extend Stride; |
2159 | // The backedgeTakenCount is non-negative, so we zero extend BETakenCount. |
2160 | const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); |
2161 | uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); |
2162 | uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); |
2163 | const SCEV *CastedStride = StrideExpr; |
2164 | const SCEV *CastedBECount = BETakenCount; |
2165 | ScalarEvolution *SE = PSE->getSE(); |
2166 | if (BETypeSize >= StrideTypeSize) |
2167 | CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); |
2168 | else |
2169 | CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); |
2170 | const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); |
2171 | // Since TripCount == BackEdgeTakenCount + 1, checking: |
2172 | // "Stride >= TripCount" is equivalent to checking: |
2173 | // Stride - BETakenCount > 0 |
2174 | if (SE->isKnownPositive(StrideMinusBETaken)) { |
2175 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " "Stride==1 predicate will imply that the loop executes " "at most once.\n" ; } } while (false) |
2176 | dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " "Stride==1 predicate will imply that the loop executes " "at most once.\n" ; } } while (false) |
2177 | "Stride==1 predicate will imply that the loop executes "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " "Stride==1 predicate will imply that the loop executes " "at most once.\n" ; } } while (false) |
2178 | "at most once.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " "Stride==1 predicate will imply that the loop executes " "at most once.\n" ; } } while (false); |
2179 | return; |
2180 | } |
2181 | LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("loop-accesses")) { dbgs() << "LAA: Found a strided access that we can version." ; } } while (false); |
2182 | |
2183 | SymbolicStrides[Ptr] = Stride; |
2184 | StrideSet.insert(Stride); |
2185 | } |
2186 | |
2187 | LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, |
2188 | const TargetLibraryInfo *TLI, AAResults *AA, |
2189 | DominatorTree *DT, LoopInfo *LI) |
2190 | : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), |
2191 | PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)), |
2192 | DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L), |
2193 | NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), |
2194 | HasConvergentOp(false), |
2195 | HasDependenceInvolvingLoopInvariantAddress(false) { |
2196 | if (canAnalyzeLoop()) |
2197 | analyzeLoop(AA, LI, TLI, DT); |
2198 | } |
2199 | |
2200 | void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { |
2201 | if (CanVecMem) { |
2202 | OS.indent(Depth) << "Memory dependences are safe"; |
2203 | if (MaxSafeDepDistBytes != -1ULL) |
2204 | OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes |
2205 | << " bytes"; |
2206 | if (PtrRtChecking->Need) |
2207 | OS << " with run-time checks"; |
2208 | OS << "\n"; |
2209 | } |
2210 | |
2211 | if (HasConvergentOp) |
2212 | OS.indent(Depth) << "Has convergent operation in loop\n"; |
2213 | |
2214 | if (Report) |
2215 | OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; |
2216 | |
2217 | if (auto *Dependences = DepChecker->getDependences()) { |
2218 | OS.indent(Depth) << "Dependences:\n"; |
2219 | for (auto &Dep : *Dependences) { |
2220 | Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); |
2221 | OS << "\n"; |
2222 | } |
2223 | } else |
2224 | OS.indent(Depth) << "Too many dependences, not recorded\n"; |
2225 | |
2226 | // List the pair of accesses need run-time checks to prove independence. |
2227 | PtrRtChecking->print(OS, Depth); |
2228 | OS << "\n"; |
2229 | |
2230 | OS.indent(Depth) << "Non vectorizable stores to invariant address were " |
2231 | << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ") |
2232 | << "found in loop.\n"; |
2233 | |
2234 | OS.indent(Depth) << "SCEV assumptions:\n"; |
2235 | PSE->getUnionPredicate().print(OS, Depth); |
2236 | |
2237 | OS << "\n"; |
2238 | |
2239 | OS.indent(Depth) << "Expressions re-written:\n"; |
2240 | PSE->print(OS, Depth); |
2241 | } |
2242 | |
2243 | LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) { |
2244 | initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry()); |
2245 | } |
2246 | |
2247 | const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { |
2248 | auto &LAI = LoopAccessInfoMap[L]; |
2249 | |
2250 | if (!LAI) |
2251 | LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI); |
2252 | |
2253 | return *LAI.get(); |
2254 | } |
2255 | |
2256 | void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const { |
2257 | LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this); |
2258 | |
2259 | for (Loop *TopLevelLoop : *LI) |
2260 | for (Loop *L : depth_first(TopLevelLoop)) { |
2261 | OS.indent(2) << L->getHeader()->getName() << ":\n"; |
2262 | auto &LAI = LAA.getInfo(L); |
2263 | LAI.print(OS, 4); |
2264 | } |
2265 | } |
2266 | |
2267 | bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) { |
2268 | SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); |
2269 | auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); |
2270 | TLI = TLIP ? &TLIP->getTLI(F) : nullptr; |
2271 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); |
2272 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); |
2273 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); |
2274 | |
2275 | return false; |
2276 | } |
2277 | |
2278 | void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { |
2279 | AU.addRequired<ScalarEvolutionWrapperPass>(); |
2280 | AU.addRequired<AAResultsWrapperPass>(); |
2281 | AU.addRequired<DominatorTreeWrapperPass>(); |
2282 | AU.addRequired<LoopInfoWrapperPass>(); |
2283 | |
2284 | AU.setPreservesAll(); |
2285 | } |
2286 | |
2287 | char LoopAccessLegacyAnalysis::ID = 0; |
2288 | static const char laa_name[] = "Loop Access Analysis"; |
2289 | #define LAA_NAME"loop-accesses" "loop-accesses" |
2290 | |
2291 | INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)static void *initializeLoopAccessLegacyAnalysisPassOnce(PassRegistry &Registry) { |
2292 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); |
2293 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry); |
2294 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); |
2295 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); |
2296 | INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)PassInfo *PI = new PassInfo( laa_name, "loop-accesses", & LoopAccessLegacyAnalysis::ID, PassInfo::NormalCtor_t(callDefaultCtor <LoopAccessLegacyAnalysis>), false, true); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeLoopAccessLegacyAnalysisPassFlag ; void llvm::initializeLoopAccessLegacyAnalysisPass(PassRegistry &Registry) { llvm::call_once(InitializeLoopAccessLegacyAnalysisPassFlag , initializeLoopAccessLegacyAnalysisPassOnce, std::ref(Registry )); } |
2297 | |
2298 | AnalysisKey LoopAccessAnalysis::Key; |
2299 | |
2300 | LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, |
2301 | LoopStandardAnalysisResults &AR) { |
2302 | return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI); |
2303 | } |
2304 | |
2305 | namespace llvm { |
2306 | |
2307 | Pass *createLAAPass() { |
2308 | return new LoopAccessLegacyAnalysis(); |
2309 | } |
2310 | |
2311 | } // end namespace llvm |