LLVM  9.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/DiagnosticInfo.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/IRBuilder.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/IR/PassManager.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/Debug.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <cstdlib>
65 #include <iterator>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 
71 #define DEBUG_TYPE "loop-accesses"
72 
74 VectorizationFactor("force-vector-width", cl::Hidden,
75  cl::desc("Sets the SIMD width. Zero is autoselect."),
78 
80 VectorizationInterleave("force-vector-interleave", cl::Hidden,
81  cl::desc("Sets the vectorization interleave count. "
82  "Zero is autoselect."),
86 
88  "runtime-memory-check-threshold", cl::Hidden,
89  cl::desc("When performing memory disambiguation checks at runtime do not "
90  "generate more than this number of comparisons (default = 8)."),
93 
94 /// The maximum iterations used to merge memory checks
96  "memory-check-merge-threshold", cl::Hidden,
97  cl::desc("Maximum number of comparisons done when trying to merge "
98  "runtime memory checks. (default = 100)"),
99  cl::init(100));
100 
101 /// Maximum SIMD width.
102 const unsigned VectorizerParams::MaxVectorWidth = 64;
103 
104 /// We collect dependences up to this threshold.
105 static cl::opt<unsigned>
106  MaxDependences("max-dependences", cl::Hidden,
107  cl::desc("Maximum number of dependences collected by "
108  "loop-access analysis (default = 100)"),
109  cl::init(100));
110 
111 /// This enables versioning on the strides of symbolically striding memory
112 /// accesses in code like the following.
113 /// for (i = 0; i < N; ++i)
114 /// A[i * Stride1] += B[i * Stride2] ...
115 ///
116 /// Will be roughly translated to
117 /// if (Stride1 == 1 && Stride2 == 1) {
118 /// for (i = 0; i < N; i+=4)
119 /// A[i:i+3] += ...
120 /// } else
121 /// ...
123  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
124  cl::desc("Enable symbolic stride memory access versioning"));
125 
126 /// Enable store-to-load forwarding conflict detection. This option can
127 /// be disabled for correctness testing.
129  "store-to-load-forwarding-conflict-detection", cl::Hidden,
130  cl::desc("Enable conflict detection in loop-access analysis"),
131  cl::init(true));
132 
134  return ::VectorizationInterleave.getNumOccurrences() > 0;
135 }
136 
138  if (auto *CI = dyn_cast<CastInst>(V))
139  if (CI->getOperand(0)->getType()->isIntegerTy())
140  return CI->getOperand(0);
141  return V;
142 }
143 
145  const ValueToValueMap &PtrToStride,
146  Value *Ptr, Value *OrigPtr) {
147  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
148 
149  // If there is an entry in the map return the SCEV of the pointer with the
150  // symbolic stride replaced by one.
152  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
153  if (SI != PtrToStride.end()) {
154  Value *StrideVal = SI->second;
155 
156  // Strip casts.
157  StrideVal = stripIntegerCast(StrideVal);
158 
159  ScalarEvolution *SE = PSE.getSE();
160  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
161  const auto *CT =
162  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
163 
164  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
165  auto *Expr = PSE.getSCEV(Ptr);
166 
167  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
168  << " by: " << *Expr << "\n");
169  return Expr;
170  }
171 
172  // Otherwise, just return the SCEV of the original pointer.
173  return OrigSCEV;
174 }
175 
176 /// Calculate Start and End points of memory access.
177 /// Let's assume A is the first access and B is a memory access on N-th loop
178 /// iteration. Then B is calculated as:
179 /// B = A + Step*N .
180 /// Step value may be positive or negative.
181 /// N is a calculated back-edge taken count:
182 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
183 /// Start and End points are calculated in the following way:
184 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
185 /// where SizeOfElt is the size of single memory access in bytes.
186 ///
187 /// There is no conflict when the intervals are disjoint:
188 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
189 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
190  unsigned DepSetId, unsigned ASId,
191  const ValueToValueMap &Strides,
193  // Get the stride replaced scev.
194  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
195  ScalarEvolution *SE = PSE.getSE();
196 
197  const SCEV *ScStart;
198  const SCEV *ScEnd;
199 
200  if (SE->isLoopInvariant(Sc, Lp))
201  ScStart = ScEnd = Sc;
202  else {
203  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
204  assert(AR && "Invalid addrec expression");
205  const SCEV *Ex = PSE.getBackedgeTakenCount();
206 
207  ScStart = AR->getStart();
208  ScEnd = AR->evaluateAtIteration(Ex, *SE);
209  const SCEV *Step = AR->getStepRecurrence(*SE);
210 
211  // For expressions with negative step, the upper bound is ScStart and the
212  // lower bound is ScEnd.
213  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
214  if (CStep->getValue()->isNegative())
215  std::swap(ScStart, ScEnd);
216  } else {
217  // Fallback case: the step is not constant, but we can still
218  // get the upper and lower bounds of the interval by using min/max
219  // expressions.
220  ScStart = SE->getUMinExpr(ScStart, ScEnd);
221  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
222  }
223  // Add the size of the pointed element to ScEnd.
224  unsigned EltSize =
226  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
227  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
228  }
229 
230  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
231 }
232 
234 RuntimePointerChecking::generateChecks() const {
236 
237  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
238  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
239  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
240  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
241 
242  if (needsChecking(CGI, CGJ))
243  Checks.push_back(std::make_pair(&CGI, &CGJ));
244  }
245  }
246  return Checks;
247 }
248 
249 void RuntimePointerChecking::generateChecks(
250  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
251  assert(Checks.empty() && "Checks is not empty");
252  groupChecks(DepCands, UseDependencies);
253  Checks = generateChecks();
254 }
255 
257  const CheckingPtrGroup &N) const {
258  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
259  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
260  if (needsChecking(M.Members[I], N.Members[J]))
261  return true;
262  return false;
263 }
264 
265 /// Compare \p I and \p J and return the minimum.
266 /// Return nullptr in case we couldn't find an answer.
267 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
268  ScalarEvolution *SE) {
269  const SCEV *Diff = SE->getMinusSCEV(J, I);
270  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
271 
272  if (!C)
273  return nullptr;
274  if (C->getValue()->isNegative())
275  return J;
276  return I;
277 }
278 
280  const SCEV *Start = RtCheck.Pointers[Index].Start;
281  const SCEV *End = RtCheck.Pointers[Index].End;
282 
283  // Compare the starts and ends with the known minimum and maximum
284  // of this set. We need to know how we compare against the min/max
285  // of the set in order to be able to emit memchecks.
286  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
287  if (!Min0)
288  return false;
289 
290  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
291  if (!Min1)
292  return false;
293 
294  // Update the low bound expression if we've found a new min value.
295  if (Min0 == Start)
296  Low = Start;
297 
298  // Update the high bound expression if we've found a new max value.
299  if (Min1 != End)
300  High = End;
301 
302  Members.push_back(Index);
303  return true;
304 }
305 
306 void RuntimePointerChecking::groupChecks(
307  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
308  // We build the groups from dependency candidates equivalence classes
309  // because:
310  // - We know that pointers in the same equivalence class share
311  // the same underlying object and therefore there is a chance
312  // that we can compare pointers
313  // - We wouldn't be able to merge two pointers for which we need
314  // to emit a memcheck. The classes in DepCands are already
315  // conveniently built such that no two pointers in the same
316  // class need checking against each other.
317 
318  // We use the following (greedy) algorithm to construct the groups
319  // For every pointer in the equivalence class:
320  // For each existing group:
321  // - if the difference between this pointer and the min/max bounds
322  // of the group is a constant, then make the pointer part of the
323  // group and update the min/max bounds of that group as required.
324 
325  CheckingGroups.clear();
326 
327  // If we need to check two pointers to the same underlying object
328  // with a non-constant difference, we shouldn't perform any pointer
329  // grouping with those pointers. This is because we can easily get
330  // into cases where the resulting check would return false, even when
331  // the accesses are safe.
332  //
333  // The following example shows this:
334  // for (i = 0; i < 1000; ++i)
335  // a[5000 + i * m] = a[i] + a[i + 9000]
336  //
337  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
338  // (0, 10000) which is always false. However, if m is 1, there is no
339  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
340  // us to perform an accurate check in this case.
341  //
342  // The above case requires that we have an UnknownDependence between
343  // accesses to the same underlying object. This cannot happen unless
344  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
345  // is also false. In this case we will use the fallback path and create
346  // separate checking groups for all pointers.
347 
348  // If we don't have the dependency partitions, construct a new
349  // checking pointer group for each pointer. This is also required
350  // for correctness, because in this case we can have checking between
351  // pointers to the same underlying object.
352  if (!UseDependencies) {
353  for (unsigned I = 0; I < Pointers.size(); ++I)
354  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
355  return;
356  }
357 
358  unsigned TotalComparisons = 0;
359 
360  DenseMap<Value *, unsigned> PositionMap;
361  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
362  PositionMap[Pointers[Index].PointerValue] = Index;
363 
364  // We need to keep track of what pointers we've already seen so we
365  // don't process them twice.
367 
368  // Go through all equivalence classes, get the "pointer check groups"
369  // and add them to the overall solution. We use the order in which accesses
370  // appear in 'Pointers' to enforce determinism.
371  for (unsigned I = 0; I < Pointers.size(); ++I) {
372  // We've seen this pointer before, and therefore already processed
373  // its equivalence class.
374  if (Seen.count(I))
375  continue;
376 
377  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
378  Pointers[I].IsWritePtr);
379 
381  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
382 
383  // Because DepCands is constructed by visiting accesses in the order in
384  // which they appear in alias sets (which is deterministic) and the
385  // iteration order within an equivalence class member is only dependent on
386  // the order in which unions and insertions are performed on the
387  // equivalence class, the iteration order is deterministic.
388  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
389  MI != ME; ++MI) {
390  unsigned Pointer = PositionMap[MI->getPointer()];
391  bool Merged = false;
392  // Mark this pointer as seen.
393  Seen.insert(Pointer);
394 
395  // Go through all the existing sets and see if we can find one
396  // which can include this pointer.
397  for (CheckingPtrGroup &Group : Groups) {
398  // Don't perform more than a certain amount of comparisons.
399  // This should limit the cost of grouping the pointers to something
400  // reasonable. If we do end up hitting this threshold, the algorithm
401  // will create separate groups for all remaining pointers.
402  if (TotalComparisons > MemoryCheckMergeThreshold)
403  break;
404 
405  TotalComparisons++;
406 
407  if (Group.addPointer(Pointer)) {
408  Merged = true;
409  break;
410  }
411  }
412 
413  if (!Merged)
414  // We couldn't add this pointer to any existing set or the threshold
415  // for the number of comparisons has been reached. Create a new group
416  // to hold the current pointer.
417  Groups.push_back(CheckingPtrGroup(Pointer, *this));
418  }
419 
420  // We've computed the grouped checks for this partition.
421  // Save the results and continue with the next one.
422  llvm::copy(Groups, std::back_inserter(CheckingGroups));
423  }
424 }
425 
427  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
428  unsigned PtrIdx2) {
429  return (PtrToPartition[PtrIdx1] != -1 &&
430  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
431 }
432 
433 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
434  const PointerInfo &PointerI = Pointers[I];
435  const PointerInfo &PointerJ = Pointers[J];
436 
437  // No need to check if two readonly pointers intersect.
438  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
439  return false;
440 
441  // Only need to check pointers between two different dependency sets.
442  if (PointerI.DependencySetId == PointerJ.DependencySetId)
443  return false;
444 
445  // Only need to check pointers in the same alias set.
446  if (PointerI.AliasSetId != PointerJ.AliasSetId)
447  return false;
448 
449  return true;
450 }
451 
453  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
454  unsigned Depth) const {
455  unsigned N = 0;
456  for (const auto &Check : Checks) {
457  const auto &First = Check.first->Members, &Second = Check.second->Members;
458 
459  OS.indent(Depth) << "Check " << N++ << ":\n";
460 
461  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
462  for (unsigned K = 0; K < First.size(); ++K)
463  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
464 
465  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
466  for (unsigned K = 0; K < Second.size(); ++K)
467  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
468  }
469 }
470 
472 
473  OS.indent(Depth) << "Run-time memory checks:\n";
474  printChecks(OS, Checks, Depth);
475 
476  OS.indent(Depth) << "Grouped accesses:\n";
477  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
478  const auto &CG = CheckingGroups[I];
479 
480  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
481  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
482  << ")\n";
483  for (unsigned J = 0; J < CG.Members.size(); ++J) {
484  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
485  << "\n";
486  }
487  }
488 }
489 
490 namespace {
491 
492 /// Analyses memory accesses in a loop.
493 ///
494 /// Checks whether run time pointer checks are needed and builds sets for data
495 /// dependence checking.
496 class AccessAnalysis {
497 public:
498  /// Read or write access location.
499  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
500  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
501 
502  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
505  : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
506  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
507 
508  /// Register a load and whether it is only read from.
509  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
510  Value *Ptr = const_cast<Value*>(Loc.Ptr);
511  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
512  Accesses.insert(MemAccessInfo(Ptr, false));
513  if (IsReadOnly)
514  ReadOnlyPtr.insert(Ptr);
515  }
516 
517  /// Register a store.
518  void addStore(MemoryLocation &Loc) {
519  Value *Ptr = const_cast<Value*>(Loc.Ptr);
520  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
521  Accesses.insert(MemAccessInfo(Ptr, true));
522  }
523 
524  /// Check if we can emit a run-time no-alias check for \p Access.
525  ///
526  /// Returns true if we can emit a run-time no alias check for \p Access.
527  /// If we can check this access, this also adds it to a dependence set and
528  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
529  /// we will attempt to use additional run-time checks in order to get
530  /// the bounds of the pointer.
531  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
532  MemAccessInfo Access,
533  const ValueToValueMap &Strides,
534  DenseMap<Value *, unsigned> &DepSetId,
535  Loop *TheLoop, unsigned &RunningDepId,
536  unsigned ASId, bool ShouldCheckStride,
537  bool Assume);
538 
539  /// Check whether we can check the pointers at runtime for
540  /// non-intersection.
541  ///
542  /// Returns true if we need no check or if we do and we can generate them
543  /// (i.e. the pointers have computable bounds).
544  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
545  Loop *TheLoop, const ValueToValueMap &Strides,
546  bool ShouldCheckWrap = false);
547 
548  /// Goes over all memory accesses, checks whether a RT check is needed
549  /// and builds sets of dependent accesses.
550  void buildDependenceSets() {
551  processMemAccesses();
552  }
553 
554  /// Initial processing of memory accesses determined that we need to
555  /// perform dependency checking.
556  ///
557  /// Note that this can later be cleared if we retry memcheck analysis without
558  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
559  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
560 
561  /// We decided that no dependence analysis would be used. Reset the state.
562  void resetDepChecks(MemoryDepChecker &DepChecker) {
563  CheckDeps.clear();
564  DepChecker.clearDependences();
565  }
566 
567  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
568 
569 private:
570  typedef SetVector<MemAccessInfo> PtrAccessSet;
571 
572  /// Go over all memory access and check whether runtime pointer checks
573  /// are needed and build sets of dependency check candidates.
574  void processMemAccesses();
575 
576  /// Set of all accesses.
577  PtrAccessSet Accesses;
578 
579  const DataLayout &DL;
580 
581  /// The loop being checked.
582  const Loop *TheLoop;
583 
584  /// List of accesses that need a further dependence check.
585  MemAccessInfoList CheckDeps;
586 
587  /// Set of pointers that are read only.
588  SmallPtrSet<Value*, 16> ReadOnlyPtr;
589 
590  /// An alias set tracker to partition the access set by underlying object and
591  //intrinsic property (such as TBAA metadata).
592  AliasSetTracker AST;
593 
594  LoopInfo *LI;
595 
596  /// Sets of potentially dependent accesses - members of one set share an
597  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
598  /// dependence check.
600 
601  /// Initial processing of memory accesses determined that we may need
602  /// to add memchecks. Perform the analysis to determine the necessary checks.
603  ///
604  /// Note that, this is different from isDependencyCheckNeeded. When we retry
605  /// memcheck analysis without dependency checking
606  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
607  /// cleared while this remains set if we have potentially dependent accesses.
608  bool IsRTCheckAnalysisNeeded;
609 
610  /// The SCEV predicate containing all the SCEV-related assumptions.
612 };
613 
614 } // end anonymous namespace
615 
616 /// Check whether a pointer can participate in a runtime bounds check.
617 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
618 /// by adding run-time checks (overflow checks) if necessary.
620  const ValueToValueMap &Strides, Value *Ptr,
621  Loop *L, bool Assume) {
622  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
623 
624  // The bounds for loop-invariant pointer is trivial.
625  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
626  return true;
627 
628  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
629 
630  if (!AR && Assume)
631  AR = PSE.getAsAddRec(Ptr);
632 
633  if (!AR)
634  return false;
635 
636  return AR->isAffine();
637 }
638 
639 /// Check whether a pointer address cannot wrap.
641  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
642  const SCEV *PtrScev = PSE.getSCEV(Ptr);
643  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
644  return true;
645 
646  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
647  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
648  return true;
649 
650  return false;
651 }
652 
653 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
654  MemAccessInfo Access,
655  const ValueToValueMap &StridesMap,
656  DenseMap<Value *, unsigned> &DepSetId,
657  Loop *TheLoop, unsigned &RunningDepId,
658  unsigned ASId, bool ShouldCheckWrap,
659  bool Assume) {
660  Value *Ptr = Access.getPointer();
661 
662  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
663  return false;
664 
665  // When we run after a failing dependency check we have to make sure
666  // we don't have wrapping pointers.
667  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
668  auto *Expr = PSE.getSCEV(Ptr);
669  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
670  return false;
671  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
672  }
673 
674  // The id of the dependence set.
675  unsigned DepId;
676 
677  if (isDependencyCheckNeeded()) {
678  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
679  unsigned &LeaderId = DepSetId[Leader];
680  if (!LeaderId)
681  LeaderId = RunningDepId++;
682  DepId = LeaderId;
683  } else
684  // Each access has its own dependence set.
685  DepId = RunningDepId++;
686 
687  bool IsWrite = Access.getInt();
688  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
689  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
690 
691  return true;
692  }
693 
694 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
695  ScalarEvolution *SE, Loop *TheLoop,
696  const ValueToValueMap &StridesMap,
697  bool ShouldCheckWrap) {
698  // Find pointers with computable bounds. We are going to use this information
699  // to place a runtime bound check.
700  bool CanDoRT = true;
701 
702  bool NeedRTCheck = false;
703  if (!IsRTCheckAnalysisNeeded) return true;
704 
705  bool IsDepCheckNeeded = isDependencyCheckNeeded();
706 
707  // We assign a consecutive id to access from different alias sets.
708  // Accesses between different groups doesn't need to be checked.
709  unsigned ASId = 1;
710  for (auto &AS : AST) {
711  int NumReadPtrChecks = 0;
712  int NumWritePtrChecks = 0;
713  bool CanDoAliasSetRT = true;
714 
715  // We assign consecutive id to access from different dependence sets.
716  // Accesses within the same set don't need a runtime check.
717  unsigned RunningDepId = 1;
719 
721 
722  for (auto A : AS) {
723  Value *Ptr = A.getValue();
724  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
725  MemAccessInfo Access(Ptr, IsWrite);
726 
727  if (IsWrite)
728  ++NumWritePtrChecks;
729  else
730  ++NumReadPtrChecks;
731 
732  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
733  RunningDepId, ASId, ShouldCheckWrap, false)) {
734  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
735  Retries.push_back(Access);
736  CanDoAliasSetRT = false;
737  }
738  }
739 
740  // If we have at least two writes or one write and a read then we need to
741  // check them. But there is no need to checks if there is only one
742  // dependence set for this alias set.
743  //
744  // Note that this function computes CanDoRT and NeedRTCheck independently.
745  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
746  // for which we couldn't find the bounds but we don't actually need to emit
747  // any checks so it does not matter.
748  bool NeedsAliasSetRTCheck = false;
749  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
750  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
751  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
752 
753  // We need to perform run-time alias checks, but some pointers had bounds
754  // that couldn't be checked.
755  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
756  // Reset the CanDoSetRt flag and retry all accesses that have failed.
757  // We know that we need these checks, so we can now be more aggressive
758  // and add further checks if required (overflow checks).
759  CanDoAliasSetRT = true;
760  for (auto Access : Retries)
761  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
762  TheLoop, RunningDepId, ASId,
763  ShouldCheckWrap, /*Assume=*/true)) {
764  CanDoAliasSetRT = false;
765  break;
766  }
767  }
768 
769  CanDoRT &= CanDoAliasSetRT;
770  NeedRTCheck |= NeedsAliasSetRTCheck;
771  ++ASId;
772  }
773 
774  // If the pointers that we would use for the bounds comparison have different
775  // address spaces, assume the values aren't directly comparable, so we can't
776  // use them for the runtime check. We also have to assume they could
777  // overlap. In the future there should be metadata for whether address spaces
778  // are disjoint.
779  unsigned NumPointers = RtCheck.Pointers.size();
780  for (unsigned i = 0; i < NumPointers; ++i) {
781  for (unsigned j = i + 1; j < NumPointers; ++j) {
782  // Only need to check pointers between two different dependency sets.
783  if (RtCheck.Pointers[i].DependencySetId ==
784  RtCheck.Pointers[j].DependencySetId)
785  continue;
786  // Only need to check pointers in the same alias set.
787  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
788  continue;
789 
790  Value *PtrI = RtCheck.Pointers[i].PointerValue;
791  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
792 
793  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
794  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
795  if (ASi != ASj) {
796  LLVM_DEBUG(
797  dbgs() << "LAA: Runtime check would require comparison between"
798  " different address spaces\n");
799  return false;
800  }
801  }
802  }
803 
804  if (NeedRTCheck && CanDoRT)
805  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
806 
807  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
808  << " pointer comparisons.\n");
809 
810  RtCheck.Need = NeedRTCheck;
811 
812  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
813  if (!CanDoRTIfNeeded)
814  RtCheck.reset();
815  return CanDoRTIfNeeded;
816 }
817 
818 void AccessAnalysis::processMemAccesses() {
819  // We process the set twice: first we process read-write pointers, last we
820  // process read-only pointers. This allows us to skip dependence tests for
821  // read-only pointers.
822 
823  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
824  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
825  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
826  LLVM_DEBUG({
827  for (auto A : Accesses)
828  dbgs() << "\t" << *A.getPointer() << " (" <<
829  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
830  "read-only" : "read")) << ")\n";
831  });
832 
833  // The AliasSetTracker has nicely partitioned our pointers by metadata
834  // compatibility and potential for underlying-object overlap. As a result, we
835  // only need to check for potential pointer dependencies within each alias
836  // set.
837  for (auto &AS : AST) {
838  // Note that both the alias-set tracker and the alias sets themselves used
839  // linked lists internally and so the iteration order here is deterministic
840  // (matching the original instruction order within each set).
841 
842  bool SetHasWrite = false;
843 
844  // Map of pointers to last access encountered.
845  typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
846  UnderlyingObjToAccessMap ObjToLastAccess;
847 
848  // Set of access to check after all writes have been processed.
849  PtrAccessSet DeferredAccesses;
850 
851  // Iterate over each alias set twice, once to process read/write pointers,
852  // and then to process read-only pointers.
853  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
854  bool UseDeferred = SetIteration > 0;
855  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
856 
857  for (auto AV : AS) {
858  Value *Ptr = AV.getValue();
859 
860  // For a single memory access in AliasSetTracker, Accesses may contain
861  // both read and write, and they both need to be handled for CheckDeps.
862  for (auto AC : S) {
863  if (AC.getPointer() != Ptr)
864  continue;
865 
866  bool IsWrite = AC.getInt();
867 
868  // If we're using the deferred access set, then it contains only
869  // reads.
870  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
871  if (UseDeferred && !IsReadOnlyPtr)
872  continue;
873  // Otherwise, the pointer must be in the PtrAccessSet, either as a
874  // read or a write.
875  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
876  S.count(MemAccessInfo(Ptr, false))) &&
877  "Alias-set pointer not in the access set?");
878 
879  MemAccessInfo Access(Ptr, IsWrite);
880  DepCands.insert(Access);
881 
882  // Memorize read-only pointers for later processing and skip them in
883  // the first round (they need to be checked after we have seen all
884  // write pointers). Note: we also mark pointer that are not
885  // consecutive as "read-only" pointers (so that we check
886  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
887  if (!UseDeferred && IsReadOnlyPtr) {
888  DeferredAccesses.insert(Access);
889  continue;
890  }
891 
892  // If this is a write - check other reads and writes for conflicts. If
893  // this is a read only check other writes for conflicts (but only if
894  // there is no other write to the ptr - this is an optimization to
895  // catch "a[i] = a[i] + " without having to do a dependence check).
896  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
897  CheckDeps.push_back(Access);
898  IsRTCheckAnalysisNeeded = true;
899  }
900 
901  if (IsWrite)
902  SetHasWrite = true;
903 
904  // Create sets of pointers connected by a shared alias set and
905  // underlying object.
906  typedef SmallVector<const Value *, 16> ValueVector;
907  ValueVector TempObjects;
908 
909  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
910  LLVM_DEBUG(dbgs()
911  << "Underlying objects for pointer " << *Ptr << "\n");
912  for (const Value *UnderlyingObj : TempObjects) {
913  // nullptr never alias, don't join sets for pointer that have "null"
914  // in their UnderlyingObjects list.
915  if (isa<ConstantPointerNull>(UnderlyingObj) &&
917  TheLoop->getHeader()->getParent(),
918  UnderlyingObj->getType()->getPointerAddressSpace()))
919  continue;
920 
921  UnderlyingObjToAccessMap::iterator Prev =
922  ObjToLastAccess.find(UnderlyingObj);
923  if (Prev != ObjToLastAccess.end())
924  DepCands.unionSets(Access, Prev->second);
925 
926  ObjToLastAccess[UnderlyingObj] = Access;
927  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
928  }
929  }
930  }
931  }
932  }
933 }
934 
935 static bool isInBoundsGep(Value *Ptr) {
936  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
937  return GEP->isInBounds();
938  return false;
939 }
940 
941 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
942 /// i.e. monotonically increasing/decreasing.
943 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
944  PredicatedScalarEvolution &PSE, const Loop *L) {
945  // FIXME: This should probably only return true for NUW.
947  return true;
948 
949  // Scalar evolution does not propagate the non-wrapping flags to values that
950  // are derived from a non-wrapping induction variable because non-wrapping
951  // could be flow-sensitive.
952  //
953  // Look through the potentially overflowing instruction to try to prove
954  // non-wrapping for the *specific* value of Ptr.
955 
956  // The arithmetic implied by an inbounds GEP can't overflow.
957  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
958  if (!GEP || !GEP->isInBounds())
959  return false;
960 
961  // Make sure there is only one non-const index and analyze that.
962  Value *NonConstIndex = nullptr;
963  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
964  if (!isa<ConstantInt>(Index)) {
965  if (NonConstIndex)
966  return false;
967  NonConstIndex = Index;
968  }
969  if (!NonConstIndex)
970  // The recurrence is on the pointer, ignore for now.
971  return false;
972 
973  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
974  // AddRec using a NSW operation.
975  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
976  if (OBO->hasNoSignedWrap() &&
977  // Assume constant for other the operand so that the AddRec can be
978  // easily found.
979  isa<ConstantInt>(OBO->getOperand(1))) {
980  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
981 
982  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
983  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
984  }
985 
986  return false;
987 }
988 
989 /// Check whether the access through \p Ptr has a constant stride.
991  const Loop *Lp, const ValueToValueMap &StridesMap,
992  bool Assume, bool ShouldCheckWrap) {
993  Type *Ty = Ptr->getType();
994  assert(Ty->isPointerTy() && "Unexpected non-ptr");
995 
996  // Make sure that the pointer does not point to aggregate types.
997  auto *PtrTy = cast<PointerType>(Ty);
998  if (PtrTy->getElementType()->isAggregateType()) {
999  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1000  << *Ptr << "\n");
1001  return 0;
1002  }
1003 
1004  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1005 
1006  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1007  if (Assume && !AR)
1008  AR = PSE.getAsAddRec(Ptr);
1009 
1010  if (!AR) {
1011  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1012  << " SCEV: " << *PtrScev << "\n");
1013  return 0;
1014  }
1015 
1016  // The access function must stride over the innermost loop.
1017  if (Lp != AR->getLoop()) {
1018  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1019  << *Ptr << " SCEV: " << *AR << "\n");
1020  return 0;
1021  }
1022 
1023  // The address calculation must not wrap. Otherwise, a dependence could be
1024  // inverted.
1025  // An inbounds getelementptr that is a AddRec with a unit stride
1026  // cannot wrap per definition. The unit stride requirement is checked later.
1027  // An getelementptr without an inbounds attribute and unit stride would have
1028  // to access the pointer value "0" which is undefined behavior in address
1029  // space 0, therefore we can also vectorize this case.
1030  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1031  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1033  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1034  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1036  PtrTy->getAddressSpace())) {
1037  if (Assume) {
1039  IsNoWrapAddRec = true;
1040  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1041  << "LAA: Pointer: " << *Ptr << "\n"
1042  << "LAA: SCEV: " << *AR << "\n"
1043  << "LAA: Added an overflow assumption\n");
1044  } else {
1045  LLVM_DEBUG(
1046  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1047  << *Ptr << " SCEV: " << *AR << "\n");
1048  return 0;
1049  }
1050  }
1051 
1052  // Check the step is constant.
1053  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1054 
1055  // Calculate the pointer stride and check if it is constant.
1056  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1057  if (!C) {
1058  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1059  << " SCEV: " << *AR << "\n");
1060  return 0;
1061  }
1062 
1063  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1064  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1065  const APInt &APStepVal = C->getAPInt();
1066 
1067  // Huge step value - give up.
1068  if (APStepVal.getBitWidth() > 64)
1069  return 0;
1070 
1071  int64_t StepVal = APStepVal.getSExtValue();
1072 
1073  // Strided access.
1074  int64_t Stride = StepVal / Size;
1075  int64_t Rem = StepVal % Size;
1076  if (Rem)
1077  return 0;
1078 
1079  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1080  // know we can't "wrap around the address space". In case of address space
1081  // zero we know that this won't happen without triggering undefined behavior.
1082  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1083  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1084  PtrTy->getAddressSpace()))) {
1085  if (Assume) {
1086  // We can avoid this case by adding a run-time check.
1087  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1088  << "inbounds or in address space 0 may wrap:\n"
1089  << "LAA: Pointer: " << *Ptr << "\n"
1090  << "LAA: SCEV: " << *AR << "\n"
1091  << "LAA: Added an overflow assumption\n");
1093  } else
1094  return 0;
1095  }
1096 
1097  return Stride;
1098 }
1099 
1101  ScalarEvolution &SE,
1102  SmallVectorImpl<unsigned> &SortedIndices) {
1104  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1105  "Expected list of pointer operands.");
1107  OffValPairs.reserve(VL.size());
1108 
1109  // Walk over the pointers, and map each of them to an offset relative to
1110  // first pointer in the array.
1111  Value *Ptr0 = VL[0];
1112  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1113  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1114 
1116  for (auto *Ptr : VL) {
1117  // TODO: Outline this code as a special, more time consuming, version of
1118  // computeConstantDifference() function.
1119  if (Ptr->getType()->getPointerAddressSpace() !=
1120  Ptr0->getType()->getPointerAddressSpace())
1121  return false;
1122  // If a pointer refers to a different underlying object, bail - the
1123  // pointers are by definition incomparable.
1124  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1125  if (CurrObj != Obj0)
1126  return false;
1127 
1128  const SCEV *Scev = SE.getSCEV(Ptr);
1129  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1130  // The pointers may not have a constant offset from each other, or SCEV
1131  // may just not be smart enough to figure out they do. Regardless,
1132  // there's nothing we can do.
1133  if (!Diff)
1134  return false;
1135 
1136  // Check if the pointer with the same offset is found.
1137  int64_t Offset = Diff->getAPInt().getSExtValue();
1138  if (!Offsets.insert(Offset).second)
1139  return false;
1140  OffValPairs.emplace_back(Offset, Ptr);
1141  }
1142  SortedIndices.clear();
1143  SortedIndices.resize(VL.size());
1144  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1145 
1146  // Sort the memory accesses and keep the order of their uses in UseOrder.
1147  llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
1148  return OffValPairs[Left].first < OffValPairs[Right].first;
1149  });
1150 
1151  // Check if the order is consecutive already.
1152  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1153  return I == SortedIndices[I];
1154  }))
1155  SortedIndices.clear();
1156 
1157  return true;
1158 }
1159 
1160 /// Take the address space operand from the Load/Store instruction.
1161 /// Returns -1 if this is not a valid Load/Store instruction.
1162 static unsigned getAddressSpaceOperand(Value *I) {
1163  if (LoadInst *L = dyn_cast<LoadInst>(I))
1164  return L->getPointerAddressSpace();
1165  if (StoreInst *S = dyn_cast<StoreInst>(I))
1166  return S->getPointerAddressSpace();
1167  return -1;
1168 }
1169 
1170 /// Returns true if the memory operations \p A and \p B are consecutive.
1172  ScalarEvolution &SE, bool CheckType) {
1173  Value *PtrA = getLoadStorePointerOperand(A);
1174  Value *PtrB = getLoadStorePointerOperand(B);
1175  unsigned ASA = getAddressSpaceOperand(A);
1176  unsigned ASB = getAddressSpaceOperand(B);
1177 
1178  // Check that the address spaces match and that the pointers are valid.
1179  if (!PtrA || !PtrB || (ASA != ASB))
1180  return false;
1181 
1182  // Make sure that A and B are different pointers.
1183  if (PtrA == PtrB)
1184  return false;
1185 
1186  // Make sure that A and B have the same type if required.
1187  if (CheckType && PtrA->getType() != PtrB->getType())
1188  return false;
1189 
1190  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1191  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1192  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1193 
1194  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1195  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1196  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1197 
1198  // OffsetDelta = OffsetB - OffsetA;
1199  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1200  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1201  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1202  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1203  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1204  // Check if they are based on the same pointer. That makes the offsets
1205  // sufficient.
1206  if (PtrA == PtrB)
1207  return OffsetDelta == Size;
1208 
1209  // Compute the necessary base pointer delta to have the necessary final delta
1210  // equal to the size.
1211  // BaseDelta = Size - OffsetDelta;
1212  const SCEV *SizeSCEV = SE.getConstant(Size);
1213  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1214 
1215  // Otherwise compute the distance with SCEV between the base pointers.
1216  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1217  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1218  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1219  return X == PtrSCEVB;
1220 }
1221 
1224  switch (Type) {
1225  case NoDep:
1226  case Forward:
1227  case BackwardVectorizable:
1228  return VectorizationSafetyStatus::Safe;
1229 
1230  case Unknown:
1231  return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1232  case ForwardButPreventsForwarding:
1233  case Backward:
1234  case BackwardVectorizableButPreventsForwarding:
1235  return VectorizationSafetyStatus::Unsafe;
1236  }
1237  llvm_unreachable("unexpected DepType!");
1238 }
1239 
1241  switch (Type) {
1242  case NoDep:
1243  case Forward:
1244  case ForwardButPreventsForwarding:
1245  case Unknown:
1246  return false;
1247 
1248  case BackwardVectorizable:
1249  case Backward:
1250  case BackwardVectorizableButPreventsForwarding:
1251  return true;
1252  }
1253  llvm_unreachable("unexpected DepType!");
1254 }
1255 
1257  return isBackward() || Type == Unknown;
1258 }
1259 
1261  switch (Type) {
1262  case Forward:
1263  case ForwardButPreventsForwarding:
1264  return true;
1265 
1266  case NoDep:
1267  case Unknown:
1268  case BackwardVectorizable:
1269  case Backward:
1270  case BackwardVectorizableButPreventsForwarding:
1271  return false;
1272  }
1273  llvm_unreachable("unexpected DepType!");
1274 }
1275 
1276 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1277  uint64_t TypeByteSize) {
1278  // If loads occur at a distance that is not a multiple of a feasible vector
1279  // factor store-load forwarding does not take place.
1280  // Positive dependences might cause troubles because vectorizing them might
1281  // prevent store-load forwarding making vectorized code run a lot slower.
1282  // a[i] = a[i-3] ^ a[i-8];
1283  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1284  // hence on your typical architecture store-load forwarding does not take
1285  // place. Vectorizing in such cases does not make sense.
1286  // Store-load forwarding distance.
1287 
1288  // After this many iterations store-to-load forwarding conflicts should not
1289  // cause any slowdowns.
1290  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1291  // Maximum vector factor.
1292  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1293  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1294 
1295  // Compute the smallest VF at which the store and load would be misaligned.
1296  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1297  VF *= 2) {
1298  // If the number of vector iteration between the store and the load are
1299  // small we could incur conflicts.
1300  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1301  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1302  break;
1303  }
1304  }
1305 
1306  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1307  LLVM_DEBUG(
1308  dbgs() << "LAA: Distance " << Distance
1309  << " that could cause a store-load forwarding conflict\n");
1310  return true;
1311  }
1312 
1313  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1314  MaxVFWithoutSLForwardIssues !=
1315  VectorizerParams::MaxVectorWidth * TypeByteSize)
1316  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1317  return false;
1318 }
1319 
1320 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1321  if (Status < S)
1322  Status = S;
1323 }
1324 
1325 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1326 /// memory accesses, that have the same stride whose absolute value is given
1327 /// in \p Stride, and that have the same type size \p TypeByteSize,
1328 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1329 /// possible to prove statically that the dependence distance is larger
1330 /// than the range that the accesses will travel through the execution of
1331 /// the loop. If so, return true; false otherwise. This is useful for
1332 /// example in loops such as the following (PR31098):
1333 /// for (i = 0; i < D; ++i) {
1334 /// = out[i];
1335 /// out[i+D] =
1336 /// }
1338  const SCEV &BackedgeTakenCount,
1339  const SCEV &Dist, uint64_t Stride,
1340  uint64_t TypeByteSize) {
1341 
1342  // If we can prove that
1343  // (**) |Dist| > BackedgeTakenCount * Step
1344  // where Step is the absolute stride of the memory accesses in bytes,
1345  // then there is no dependence.
1346  //
1347  // Rationale:
1348  // We basically want to check if the absolute distance (|Dist/Step|)
1349  // is >= the loop iteration count (or > BackedgeTakenCount).
1350  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1351  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1352  // that the dependence distance is >= VF; This is checked elsewhere.
1353  // But in some cases we can prune unknown dependence distances early, and
1354  // even before selecting the VF, and without a runtime test, by comparing
1355  // the distance against the loop iteration count. Since the vectorized code
1356  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1357  // also guarantees that distance >= VF.
1358  //
1359  const uint64_t ByteStride = Stride * TypeByteSize;
1360  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1361  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1362 
1363  const SCEV *CastedDist = &Dist;
1364  const SCEV *CastedProduct = Product;
1365  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1366  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1367 
1368  // The dependence distance can be positive/negative, so we sign extend Dist;
1369  // The multiplication of the absolute stride in bytes and the
1370  // backedgeTakenCount is non-negative, so we zero extend Product.
1371  if (DistTypeSize > ProductTypeSize)
1372  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1373  else
1374  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1375 
1376  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1377  // (If so, then we have proven (**) because |Dist| >= Dist)
1378  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1379  if (SE.isKnownPositive(Minus))
1380  return true;
1381 
1382  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1383  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1384  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1385  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1386  if (SE.isKnownPositive(Minus))
1387  return true;
1388 
1389  return false;
1390 }
1391 
1392 /// Check the dependence for two accesses with the same stride \p Stride.
1393 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1394 /// bytes.
1395 ///
1396 /// \returns true if they are independent.
1397 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1398  uint64_t TypeByteSize) {
1399  assert(Stride > 1 && "The stride must be greater than 1");
1400  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1401  assert(Distance > 0 && "The distance must be non-zero");
1402 
1403  // Skip if the distance is not multiple of type byte size.
1404  if (Distance % TypeByteSize)
1405  return false;
1406 
1407  uint64_t ScaledDist = Distance / TypeByteSize;
1408 
1409  // No dependence if the scaled distance is not multiple of the stride.
1410  // E.g.
1411  // for (i = 0; i < 1024 ; i += 4)
1412  // A[i+2] = A[i] + 1;
1413  //
1414  // Two accesses in memory (scaled distance is 2, stride is 4):
1415  // | A[0] | | | | A[4] | | | |
1416  // | | | A[2] | | | | A[6] | |
1417  //
1418  // E.g.
1419  // for (i = 0; i < 1024 ; i += 3)
1420  // A[i+4] = A[i] + 1;
1421  //
1422  // Two accesses in memory (scaled distance is 4, stride is 3):
1423  // | A[0] | | | A[3] | | | A[6] | | |
1424  // | | | | | A[4] | | | A[7] | |
1425  return ScaledDist % Stride;
1426 }
1427 
1429 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1430  const MemAccessInfo &B, unsigned BIdx,
1431  const ValueToValueMap &Strides) {
1432  assert (AIdx < BIdx && "Must pass arguments in program order");
1433 
1434  Value *APtr = A.getPointer();
1435  Value *BPtr = B.getPointer();
1436  bool AIsWrite = A.getInt();
1437  bool BIsWrite = B.getInt();
1438 
1439  // Two reads are independent.
1440  if (!AIsWrite && !BIsWrite)
1441  return Dependence::NoDep;
1442 
1443  // We cannot check pointers in different address spaces.
1444  if (APtr->getType()->getPointerAddressSpace() !=
1445  BPtr->getType()->getPointerAddressSpace())
1446  return Dependence::Unknown;
1447 
1448  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1449  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1450 
1451  const SCEV *Src = PSE.getSCEV(APtr);
1452  const SCEV *Sink = PSE.getSCEV(BPtr);
1453 
1454  // If the induction step is negative we have to invert source and sink of the
1455  // dependence.
1456  if (StrideAPtr < 0) {
1457  std::swap(APtr, BPtr);
1458  std::swap(Src, Sink);
1459  std::swap(AIsWrite, BIsWrite);
1460  std::swap(AIdx, BIdx);
1461  std::swap(StrideAPtr, StrideBPtr);
1462  }
1463 
1464  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1465 
1466  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1467  << "(Induction step: " << StrideAPtr << ")\n");
1468  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1469  << *InstMap[BIdx] << ": " << *Dist << "\n");
1470 
1471  // Need accesses with constant stride. We don't want to vectorize
1472  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1473  // the address space.
1474  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1475  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1476  return Dependence::Unknown;
1477  }
1478 
1479  Type *ATy = APtr->getType()->getPointerElementType();
1480  Type *BTy = BPtr->getType()->getPointerElementType();
1481  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1482  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1483  uint64_t Stride = std::abs(StrideAPtr);
1484  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1485  if (!C) {
1486  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1487  isSafeDependenceDistance(DL, *(PSE.getSE()),
1488  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1489  TypeByteSize))
1490  return Dependence::NoDep;
1491 
1492  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1493  FoundNonConstantDistanceDependence = true;
1494  return Dependence::Unknown;
1495  }
1496 
1497  const APInt &Val = C->getAPInt();
1498  int64_t Distance = Val.getSExtValue();
1499 
1500  // Attempt to prove strided accesses independent.
1501  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1502  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1503  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1504  return Dependence::NoDep;
1505  }
1506 
1507  // Negative distances are not plausible dependencies.
1508  if (Val.isNegative()) {
1509  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1510  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1511  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1512  ATy != BTy)) {
1513  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1514  return Dependence::ForwardButPreventsForwarding;
1515  }
1516 
1517  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1518  return Dependence::Forward;
1519  }
1520 
1521  // Write to the same location with the same size.
1522  // Could be improved to assert type sizes are the same (i32 == float, etc).
1523  if (Val == 0) {
1524  if (ATy == BTy)
1525  return Dependence::Forward;
1526  LLVM_DEBUG(
1527  dbgs() << "LAA: Zero dependence difference but different types\n");
1528  return Dependence::Unknown;
1529  }
1530 
1531  assert(Val.isStrictlyPositive() && "Expect a positive value");
1532 
1533  if (ATy != BTy) {
1534  LLVM_DEBUG(
1535  dbgs()
1536  << "LAA: ReadWrite-Write positive dependency with different types\n");
1537  return Dependence::Unknown;
1538  }
1539 
1540  // Bail out early if passed-in parameters make vectorization not feasible.
1541  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1543  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1545  // The minimum number of iterations for a vectorized/unrolled version.
1546  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1547 
1548  // It's not vectorizable if the distance is smaller than the minimum distance
1549  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1550  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1551  // TypeByteSize (No need to plus the last gap distance).
1552  //
1553  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1554  // foo(int *A) {
1555  // int *B = (int *)((char *)A + 14);
1556  // for (i = 0 ; i < 1024 ; i += 2)
1557  // B[i] = A[i] + 1;
1558  // }
1559  //
1560  // Two accesses in memory (stride is 2):
1561  // | A[0] | | A[2] | | A[4] | | A[6] | |
1562  // | B[0] | | B[2] | | B[4] |
1563  //
1564  // Distance needs for vectorizing iterations except the last iteration:
1565  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1566  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1567  //
1568  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1569  // 12, which is less than distance.
1570  //
1571  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1572  // the minimum distance needed is 28, which is greater than distance. It is
1573  // not safe to do vectorization.
1574  uint64_t MinDistanceNeeded =
1575  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1576  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1577  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1578  << Distance << '\n');
1579  return Dependence::Backward;
1580  }
1581 
1582  // Unsafe if the minimum distance needed is greater than max safe distance.
1583  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1584  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1585  << MinDistanceNeeded << " size in bytes");
1586  return Dependence::Backward;
1587  }
1588 
1589  // Positive distance bigger than max vectorization factor.
1590  // FIXME: Should use max factor instead of max distance in bytes, which could
1591  // not handle different types.
1592  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1593  // void foo (int *A, char *B) {
1594  // for (unsigned i = 0; i < 1024; i++) {
1595  // A[i+2] = A[i] + 1;
1596  // B[i+2] = B[i] + 1;
1597  // }
1598  // }
1599  //
1600  // This case is currently unsafe according to the max safe distance. If we
1601  // analyze the two accesses on array B, the max safe dependence distance
1602  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1603  // is 8, which is less than 2 and forbidden vectorization, But actually
1604  // both A and B could be vectorized by 2 iterations.
1605  MaxSafeDepDistBytes =
1606  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1607 
1608  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1609  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1610  couldPreventStoreLoadForward(Distance, TypeByteSize))
1611  return Dependence::BackwardVectorizableButPreventsForwarding;
1612 
1613  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1614  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1615  << " with max VF = " << MaxVF << '\n');
1616  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1617  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1618  return Dependence::BackwardVectorizable;
1619 }
1620 
1622  MemAccessInfoList &CheckDeps,
1623  const ValueToValueMap &Strides) {
1624 
1625  MaxSafeDepDistBytes = -1;
1627  for (MemAccessInfo CurAccess : CheckDeps) {
1628  if (Visited.count(CurAccess))
1629  continue;
1630 
1631  // Get the relevant memory access set.
1633  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1634 
1635  // Check accesses within this set.
1637  AccessSets.member_begin(I);
1639  AccessSets.member_end();
1640 
1641  // Check every access pair.
1642  while (AI != AE) {
1643  Visited.insert(*AI);
1645  while (OI != AE) {
1646  // Check every accessing instruction pair in program order.
1647  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1648  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1649  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1650  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1651  auto A = std::make_pair(&*AI, *I1);
1652  auto B = std::make_pair(&*OI, *I2);
1653 
1654  assert(*I1 != *I2);
1655  if (*I1 > *I2)
1656  std::swap(A, B);
1657 
1659  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1660  mergeInStatus(Dependence::isSafeForVectorization(Type));
1661 
1662  // Gather dependences unless we accumulated MaxDependences
1663  // dependences. In that case return as soon as we find the first
1664  // unsafe dependence. This puts a limit on this quadratic
1665  // algorithm.
1666  if (RecordDependences) {
1667  if (Type != Dependence::NoDep)
1668  Dependences.push_back(Dependence(A.second, B.second, Type));
1669 
1670  if (Dependences.size() >= MaxDependences) {
1671  RecordDependences = false;
1672  Dependences.clear();
1673  LLVM_DEBUG(dbgs()
1674  << "Too many dependences, stopped recording\n");
1675  }
1676  }
1677  if (!RecordDependences && !isSafeForVectorization())
1678  return false;
1679  }
1680  ++OI;
1681  }
1682  AI++;
1683  }
1684  }
1685 
1686  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1687  return isSafeForVectorization();
1688 }
1689 
1692  MemAccessInfo Access(Ptr, isWrite);
1693  auto &IndexVector = Accesses.find(Access)->second;
1694 
1696  transform(IndexVector,
1697  std::back_inserter(Insts),
1698  [&](unsigned Idx) { return this->InstMap[Idx]; });
1699  return Insts;
1700 }
1701 
1702 const char *MemoryDepChecker::Dependence::DepName[] = {
1703  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1704  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1705 
1707  raw_ostream &OS, unsigned Depth,
1708  const SmallVectorImpl<Instruction *> &Instrs) const {
1709  OS.indent(Depth) << DepName[Type] << ":\n";
1710  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1711  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1712 }
1713 
1714 bool LoopAccessInfo::canAnalyzeLoop() {
1715  // We need to have a loop header.
1716  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1717  << TheLoop->getHeader()->getParent()->getName() << ": "
1718  << TheLoop->getHeader()->getName() << '\n');
1719 
1720  // We can only analyze innermost loops.
1721  if (!TheLoop->empty()) {
1722  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1723  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1724  return false;
1725  }
1726 
1727  // We must have a single backedge.
1728  if (TheLoop->getNumBackEdges() != 1) {
1729  LLVM_DEBUG(
1730  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1731  recordAnalysis("CFGNotUnderstood")
1732  << "loop control flow is not understood by analyzer";
1733  return false;
1734  }
1735 
1736  // We must have a single exiting block.
1737  if (!TheLoop->getExitingBlock()) {
1738  LLVM_DEBUG(
1739  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1740  recordAnalysis("CFGNotUnderstood")
1741  << "loop control flow is not understood by analyzer";
1742  return false;
1743  }
1744 
1745  // We only handle bottom-tested loops, i.e. loop in which the condition is
1746  // checked at the end of each iteration. With that we can assume that all
1747  // instructions in the loop are executed the same number of times.
1748  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1749  LLVM_DEBUG(
1750  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1751  recordAnalysis("CFGNotUnderstood")
1752  << "loop control flow is not understood by analyzer";
1753  return false;
1754  }
1755 
1756  // ScalarEvolution needs to be able to find the exit count.
1757  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1758  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1759  recordAnalysis("CantComputeNumberOfIterations")
1760  << "could not determine number of loop iterations";
1761  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1762  return false;
1763  }
1764 
1765  return true;
1766 }
1767 
1768 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1769  const TargetLibraryInfo *TLI,
1770  DominatorTree *DT) {
1771  typedef SmallPtrSet<Value*, 16> ValueSet;
1772 
1773  // Holds the Load and Store instructions.
1776 
1777  // Holds all the different accesses in the loop.
1778  unsigned NumReads = 0;
1779  unsigned NumReadWrites = 0;
1780 
1781  bool HasComplexMemInst = false;
1782 
1783  // A runtime check is only legal to insert if there are no convergent calls.
1784  HasConvergentOp = false;
1785 
1786  PtrRtChecking->Pointers.clear();
1787  PtrRtChecking->Need = false;
1788 
1789  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1790 
1791  // For each block.
1792  for (BasicBlock *BB : TheLoop->blocks()) {
1793  // Scan the BB and collect legal loads and stores. Also detect any
1794  // convergent instructions.
1795  for (Instruction &I : *BB) {
1796  if (auto *Call = dyn_cast<CallBase>(&I)) {
1797  if (Call->isConvergent())
1798  HasConvergentOp = true;
1799  }
1800 
1801  // With both a non-vectorizable memory instruction and a convergent
1802  // operation, found in this loop, no reason to continue the search.
1803  if (HasComplexMemInst && HasConvergentOp) {
1804  CanVecMem = false;
1805  return;
1806  }
1807 
1808  // Avoid hitting recordAnalysis multiple times.
1809  if (HasComplexMemInst)
1810  continue;
1811 
1812  // If this is a load, save it. If this instruction can read from memory
1813  // but is not a load, then we quit. Notice that we don't handle function
1814  // calls that read or write.
1815  if (I.mayReadFromMemory()) {
1816  // Many math library functions read the rounding mode. We will only
1817  // vectorize a loop if it contains known function calls that don't set
1818  // the flag. Therefore, it is safe to ignore this read from memory.
1819  auto *Call = dyn_cast<CallInst>(&I);
1820  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1821  continue;
1822 
1823  // If the function has an explicit vectorized counterpart, we can safely
1824  // assume that it can be vectorized.
1825  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1826  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1827  continue;
1828 
1829  auto *Ld = dyn_cast<LoadInst>(&I);
1830  if (!Ld) {
1831  recordAnalysis("CantVectorizeInstruction", Ld)
1832  << "instruction cannot be vectorized";
1833  HasComplexMemInst = true;
1834  continue;
1835  }
1836  if (!Ld->isSimple() && !IsAnnotatedParallel) {
1837  recordAnalysis("NonSimpleLoad", Ld)
1838  << "read with atomic ordering or volatile read";
1839  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1840  HasComplexMemInst = true;
1841  continue;
1842  }
1843  NumLoads++;
1844  Loads.push_back(Ld);
1845  DepChecker->addAccess(Ld);
1847  collectStridedAccess(Ld);
1848  continue;
1849  }
1850 
1851  // Save 'store' instructions. Abort if other instructions write to memory.
1852  if (I.mayWriteToMemory()) {
1853  auto *St = dyn_cast<StoreInst>(&I);
1854  if (!St) {
1855  recordAnalysis("CantVectorizeInstruction", St)
1856  << "instruction cannot be vectorized";
1857  HasComplexMemInst = true;
1858  continue;
1859  }
1860  if (!St->isSimple() && !IsAnnotatedParallel) {
1861  recordAnalysis("NonSimpleStore", St)
1862  << "write with atomic ordering or volatile write";
1863  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1864  HasComplexMemInst = true;
1865  continue;
1866  }
1867  NumStores++;
1868  Stores.push_back(St);
1869  DepChecker->addAccess(St);
1871  collectStridedAccess(St);
1872  }
1873  } // Next instr.
1874  } // Next block.
1875 
1876  if (HasComplexMemInst) {
1877  CanVecMem = false;
1878  return;
1879  }
1880 
1881  // Now we have two lists that hold the loads and the stores.
1882  // Next, we find the pointers that they use.
1883 
1884  // Check if we see any stores. If there are no stores, then we don't
1885  // care if the pointers are *restrict*.
1886  if (!Stores.size()) {
1887  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1888  CanVecMem = true;
1889  return;
1890  }
1891 
1892  MemoryDepChecker::DepCandidates DependentAccesses;
1893  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1894  TheLoop, AA, LI, DependentAccesses, *PSE);
1895 
1896  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1897  // multiple times on the same object. If the ptr is accessed twice, once
1898  // for read and once for write, it will only appear once (on the write
1899  // list). This is okay, since we are going to check for conflicts between
1900  // writes and between reads and writes, but not between reads and reads.
1901  ValueSet Seen;
1902 
1903  // Record uniform store addresses to identify if we have multiple stores
1904  // to the same address.
1905  ValueSet UniformStores;
1906 
1907  for (StoreInst *ST : Stores) {
1908  Value *Ptr = ST->getPointerOperand();
1909 
1910  if (isUniform(Ptr))
1911  HasDependenceInvolvingLoopInvariantAddress |=
1912  !UniformStores.insert(Ptr).second;
1913 
1914  // If we did *not* see this pointer before, insert it to the read-write
1915  // list. At this phase it is only a 'write' list.
1916  if (Seen.insert(Ptr).second) {
1917  ++NumReadWrites;
1918 
1920  // The TBAA metadata could have a control dependency on the predication
1921  // condition, so we cannot rely on it when determining whether or not we
1922  // need runtime pointer checks.
1923  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1924  Loc.AATags.TBAA = nullptr;
1925 
1926  Accesses.addStore(Loc);
1927  }
1928  }
1929 
1930  if (IsAnnotatedParallel) {
1931  LLVM_DEBUG(
1932  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1933  << "checks.\n");
1934  CanVecMem = true;
1935  return;
1936  }
1937 
1938  for (LoadInst *LD : Loads) {
1939  Value *Ptr = LD->getPointerOperand();
1940  // If we did *not* see this pointer before, insert it to the
1941  // read list. If we *did* see it before, then it is already in
1942  // the read-write list. This allows us to vectorize expressions
1943  // such as A[i] += x; Because the address of A[i] is a read-write
1944  // pointer. This only works if the index of A[i] is consecutive.
1945  // If the address of i is unknown (for example A[B[i]]) then we may
1946  // read a few words, modify, and write a few words, and some of the
1947  // words may be written to the same address.
1948  bool IsReadOnlyPtr = false;
1949  if (Seen.insert(Ptr).second ||
1950  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1951  ++NumReads;
1952  IsReadOnlyPtr = true;
1953  }
1954 
1955  // See if there is an unsafe dependency between a load to a uniform address and
1956  // store to the same uniform address.
1957  if (UniformStores.count(Ptr)) {
1958  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
1959  "load and uniform store to the same address!\n");
1960  HasDependenceInvolvingLoopInvariantAddress = true;
1961  }
1962 
1964  // The TBAA metadata could have a control dependency on the predication
1965  // condition, so we cannot rely on it when determining whether or not we
1966  // need runtime pointer checks.
1967  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1968  Loc.AATags.TBAA = nullptr;
1969 
1970  Accesses.addLoad(Loc, IsReadOnlyPtr);
1971  }
1972 
1973  // If we write (or read-write) to a single destination and there are no
1974  // other reads in this loop then is it safe to vectorize.
1975  if (NumReadWrites == 1 && NumReads == 0) {
1976  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1977  CanVecMem = true;
1978  return;
1979  }
1980 
1981  // Build dependence sets and check whether we need a runtime pointer bounds
1982  // check.
1983  Accesses.buildDependenceSets();
1984 
1985  // Find pointers with computable bounds. We are going to use this information
1986  // to place a runtime bound check.
1987  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1988  TheLoop, SymbolicStrides);
1989  if (!CanDoRTIfNeeded) {
1990  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1991  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1992  << "the array bounds.\n");
1993  CanVecMem = false;
1994  return;
1995  }
1996 
1997  LLVM_DEBUG(
1998  dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
1999 
2000  CanVecMem = true;
2001  if (Accesses.isDependencyCheckNeeded()) {
2002  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2003  CanVecMem = DepChecker->areDepsSafe(
2004  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2005  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2006 
2007  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2008  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2009 
2010  // Clear the dependency checks. We assume they are not needed.
2011  Accesses.resetDepChecks(*DepChecker);
2012 
2013  PtrRtChecking->reset();
2014  PtrRtChecking->Need = true;
2015 
2016  auto *SE = PSE->getSE();
2017  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2018  SymbolicStrides, true);
2019 
2020  // Check that we found the bounds for the pointer.
2021  if (!CanDoRTIfNeeded) {
2022  recordAnalysis("CantCheckMemDepsAtRunTime")
2023  << "cannot check memory dependencies at runtime";
2024  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2025  CanVecMem = false;
2026  return;
2027  }
2028 
2029  CanVecMem = true;
2030  }
2031  }
2032 
2033  if (HasConvergentOp) {
2034  recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2035  << "cannot add control dependency to convergent operation";
2036  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2037  "would be needed with a convergent operation\n");
2038  CanVecMem = false;
2039  return;
2040  }
2041 
2042  if (CanVecMem)
2043  LLVM_DEBUG(
2044  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2045  << (PtrRtChecking->Need ? "" : " don't")
2046  << " need runtime memory checks.\n");
2047  else {
2048  recordAnalysis("UnsafeMemDep")
2049  << "unsafe dependent memory operations in loop. Use "
2050  "#pragma loop distribute(enable) to allow loop distribution "
2051  "to attempt to isolate the offending operations into a separate "
2052  "loop";
2053  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2054  }
2055 }
2056 
2058  DominatorTree *DT) {
2059  assert(TheLoop->contains(BB) && "Unknown block used");
2060 
2061  // Blocks that do not dominate the latch need predication.
2062  BasicBlock* Latch = TheLoop->getLoopLatch();
2063  return !DT->dominates(BB, Latch);
2064 }
2065 
2066 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2067  Instruction *I) {
2068  assert(!Report && "Multiple reports generated");
2069 
2070  Value *CodeRegion = TheLoop->getHeader();
2071  DebugLoc DL = TheLoop->getStartLoc();
2072 
2073  if (I) {
2074  CodeRegion = I->getParent();
2075  // If there is no debug location attached to the instruction, revert back to
2076  // using the loop's.
2077  if (I->getDebugLoc())
2078  DL = I->getDebugLoc();
2079  }
2080 
2081  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2082  CodeRegion);
2083  return *Report;
2084 }
2085 
2087  auto *SE = PSE->getSE();
2088  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2089  // never considered uniform.
2090  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2091  // trivially loop-invariant FP values to be considered uniform.
2092  if (!SE->isSCEVable(V->getType()))
2093  return false;
2094  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2095 }
2096 
2097 // FIXME: this function is currently a duplicate of the one in
2098 // LoopVectorize.cpp.
2100  Instruction *Loc) {
2101  if (FirstInst)
2102  return FirstInst;
2103  if (Instruction *I = dyn_cast<Instruction>(V))
2104  return I->getParent() == Loc->getParent() ? I : nullptr;
2105  return nullptr;
2106 }
2107 
2108 namespace {
2109 
2110 /// IR Values for the lower and upper bounds of a pointer evolution. We
2111 /// need to use value-handles because SCEV expansion can invalidate previously
2112 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2113 /// a previous one.
2114 struct PointerBounds {
2115  TrackingVH<Value> Start;
2116  TrackingVH<Value> End;
2117 };
2118 
2119 } // end anonymous namespace
2120 
2121 /// Expand code for the lower and upper bound of the pointer group \p CG
2122 /// in \p TheLoop. \return the values for the bounds.
2123 static PointerBounds
2125  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2126  const RuntimePointerChecking &PtrRtChecking) {
2127  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2128  const SCEV *Sc = SE->getSCEV(Ptr);
2129 
2130  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2131  LLVMContext &Ctx = Loc->getContext();
2132 
2133  // Use this type for pointer arithmetic.
2134  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2135 
2136  if (SE->isLoopInvariant(Sc, TheLoop)) {
2137  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2138  << *Ptr << "\n");
2139  // Ptr could be in the loop body. If so, expand a new one at the correct
2140  // location.
2141  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2142  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2143  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2144  : Ptr;
2145  // We must return a half-open range, which means incrementing Sc.
2146  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2147  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2148  return {NewPtr, NewPtrPlusOne};
2149  } else {
2150  Value *Start = nullptr, *End = nullptr;
2151  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2152  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2153  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2154  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2155  << "\n");
2156  return {Start, End};
2157  }
2158 }
2159 
2160 /// Turns a collection of checks into a collection of expanded upper and
2161 /// lower bounds for both pointers in the check.
2164  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2165  const RuntimePointerChecking &PtrRtChecking) {
2167 
2168  // Here we're relying on the SCEV Expander's cache to only emit code for the
2169  // same bounds once.
2170  transform(
2171  PointerChecks, std::back_inserter(ChecksWithBounds),
2173  PointerBounds
2174  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2175  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2176  return std::make_pair(First, Second);
2177  });
2178 
2179  return ChecksWithBounds;
2180 }
2181 
2182 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2183  Instruction *Loc,
2185  const {
2186  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2187  auto *SE = PSE->getSE();
2188  SCEVExpander Exp(*SE, DL, "induction");
2189  auto ExpandedChecks =
2190  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2191 
2192  LLVMContext &Ctx = Loc->getContext();
2193  Instruction *FirstInst = nullptr;
2194  IRBuilder<> ChkBuilder(Loc);
2195  // Our instructions might fold to a constant.
2196  Value *MemoryRuntimeCheck = nullptr;
2197 
2198  for (const auto &Check : ExpandedChecks) {
2199  const PointerBounds &A = Check.first, &B = Check.second;
2200  // Check if two pointers (A and B) conflict where conflict is computed as:
2201  // start(A) <= end(B) && start(B) <= end(A)
2202  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2203  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2204 
2205  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2206  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2207  "Trying to bounds check pointers with different address spaces");
2208 
2209  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2210  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2211 
2212  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2213  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2214  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2215  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2216 
2217  // [A|B].Start points to the first accessed byte under base [A|B].
2218  // [A|B].End points to the last accessed byte, plus one.
2219  // There is no conflict when the intervals are disjoint:
2220  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2221  //
2222  // bound0 = (B.Start < A.End)
2223  // bound1 = (A.Start < B.End)
2224  // IsConflict = bound0 & bound1
2225  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2226  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2227  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2228  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2229  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2230  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2231  if (MemoryRuntimeCheck) {
2232  IsConflict =
2233  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2234  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2235  }
2236  MemoryRuntimeCheck = IsConflict;
2237  }
2238 
2239  if (!MemoryRuntimeCheck)
2240  return std::make_pair(nullptr, nullptr);
2241 
2242  // We have to do this trickery because the IRBuilder might fold the check to a
2243  // constant expression in which case there is no Instruction anchored in a
2244  // the block.
2245  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2246  ConstantInt::getTrue(Ctx));
2247  ChkBuilder.Insert(Check, "memcheck.conflict");
2248  FirstInst = getFirstInst(FirstInst, Check, Loc);
2249  return std::make_pair(FirstInst, Check);
2250 }
2251 
2252 std::pair<Instruction *, Instruction *>
2254  if (!PtrRtChecking->Need)
2255  return std::make_pair(nullptr, nullptr);
2256 
2257  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2258 }
2259 
2260 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2261  Value *Ptr = nullptr;
2262  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2263  Ptr = LI->getPointerOperand();
2264  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2265  Ptr = SI->getPointerOperand();
2266  else
2267  return;
2268 
2269  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2270  if (!Stride)
2271  return;
2272 
2273  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2274  "versioning:");
2275  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2276 
2277  // Avoid adding the "Stride == 1" predicate when we know that
2278  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2279  // or zero iteration loop, as Trip-Count <= Stride == 1.
2280  //
2281  // TODO: We are currently not making a very informed decision on when it is
2282  // beneficial to apply stride versioning. It might make more sense that the
2283  // users of this analysis (such as the vectorizer) will trigger it, based on
2284  // their specific cost considerations; For example, in cases where stride
2285  // versioning does not help resolving memory accesses/dependences, the
2286  // vectorizer should evaluate the cost of the runtime test, and the benefit
2287  // of various possible stride specializations, considering the alternatives
2288  // of using gather/scatters (if available).
2289 
2290  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2291  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2292 
2293  // Match the types so we can compare the stride and the BETakenCount.
2294  // The Stride can be positive/negative, so we sign extend Stride;
2295  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2296  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2297  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2298  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2299  const SCEV *CastedStride = StrideExpr;
2300  const SCEV *CastedBECount = BETakenCount;
2301  ScalarEvolution *SE = PSE->getSE();
2302  if (BETypeSize >= StrideTypeSize)
2303  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2304  else
2305  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2306  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2307  // Since TripCount == BackEdgeTakenCount + 1, checking:
2308  // "Stride >= TripCount" is equivalent to checking:
2309  // Stride - BETakenCount > 0
2310  if (SE->isKnownPositive(StrideMinusBETaken)) {
2311  LLVM_DEBUG(
2312  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2313  "Stride==1 predicate will imply that the loop executes "
2314  "at most once.\n");
2315  return;
2316  }
2317  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2318 
2319  SymbolicStrides[Ptr] = Stride;
2320  StrideSet.insert(Stride);
2321 }
2322 
2324  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2325  DominatorTree *DT, LoopInfo *LI)
2326  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2327  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2328  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2329  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2330  HasConvergentOp(false),
2331  HasDependenceInvolvingLoopInvariantAddress(false) {
2332  if (canAnalyzeLoop())
2333  analyzeLoop(AA, LI, TLI, DT);
2334 }
2335 
2336 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2337  if (CanVecMem) {
2338  OS.indent(Depth) << "Memory dependences are safe";
2339  if (MaxSafeDepDistBytes != -1ULL)
2340  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2341  << " bytes";
2342  if (PtrRtChecking->Need)
2343  OS << " with run-time checks";
2344  OS << "\n";
2345  }
2346 
2347  if (HasConvergentOp)
2348  OS.indent(Depth) << "Has convergent operation in loop\n";
2349 
2350  if (Report)
2351  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2352 
2353  if (auto *Dependences = DepChecker->getDependences()) {
2354  OS.indent(Depth) << "Dependences:\n";
2355  for (auto &Dep : *Dependences) {
2356  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2357  OS << "\n";
2358  }
2359  } else
2360  OS.indent(Depth) << "Too many dependences, not recorded\n";
2361 
2362  // List the pair of accesses need run-time checks to prove independence.
2363  PtrRtChecking->print(OS, Depth);
2364  OS << "\n";
2365 
2366  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2367  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2368  << "found in loop.\n";
2369 
2370  OS.indent(Depth) << "SCEV assumptions:\n";
2371  PSE->getUnionPredicate().print(OS, Depth);
2372 
2373  OS << "\n";
2374 
2375  OS.indent(Depth) << "Expressions re-written:\n";
2376  PSE->print(OS, Depth);
2377 }
2378 
2380  auto &LAI = LoopAccessInfoMap[L];
2381 
2382  if (!LAI)
2383  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2384 
2385  return *LAI.get();
2386 }
2387 
2389  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2390 
2391  for (Loop *TopLevelLoop : *LI)
2392  for (Loop *L : depth_first(TopLevelLoop)) {
2393  OS.indent(2) << L->getHeader()->getName() << ":\n";
2394  auto &LAI = LAA.getInfo(L);
2395  LAI.print(OS, 4);
2396  }
2397 }
2398 
2400  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2401  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2402  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2403  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2404  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2405  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2406 
2407  return false;
2408 }
2409 
2415 
2416  AU.setPreservesAll();
2417 }
2418 
2420 static const char laa_name[] = "Loop Access Analysis";
2421 #define LAA_NAME "loop-accesses"
2422 
2429 
2431 
2434  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2435 }
2436 
2437 namespace llvm {
2438 
2440  return new LoopAccessLegacyAnalysis();
2441  }
2442 
2443 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1799
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:224
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:657
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1932
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static constexpr LocationSize unknown()
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
void push_back(const T &Elt)
Definition: SmallVector.h:211
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1072
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:720
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1192
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1437
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:369
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:133
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:229
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:102
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1809
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:320
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1217
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:390
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:327
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
unsigned getAddressSpace() const
Definition: Globals.cpp:111
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:569
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:52
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:187
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:112
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:587
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1524
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:558
Class for arbitrary precision integers.
Definition: APInt.h:69
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:469
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:321
Dependece between memory access instructions.
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:501
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1223
iterator end()
Definition: DenseMap.h:108
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1199
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1274
bool empty() const
Definition: LoopInfo.h:148
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:793
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
iterator_range< df_iterator< T > > depth_first(const T &G)
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:494
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
void stable_sort(R &&Range)
Definition: STLExtras.h:1316
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:72
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
A vector that has set insertion semantics.
Definition: SetVector.h:40
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1172
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1244
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:158
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:448
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:277
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void resize(size_type N)
Definition: SmallVector.h:344