LLVM  8.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  if (auto *CI = dyn_cast<CastInst>(V))
140  if (CI->getOperand(0)->getType()->isIntegerTy())
141  return CI->getOperand(0);
142  return V;
143 }
144 
146  const ValueToValueMap &PtrToStride,
147  Value *Ptr, Value *OrigPtr) {
148  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149 
150  // If there is an entry in the map return the SCEV of the pointer with the
151  // symbolic stride replaced by one.
153  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154  if (SI != PtrToStride.end()) {
155  Value *StrideVal = SI->second;
156 
157  // Strip casts.
158  StrideVal = stripIntegerCast(StrideVal);
159 
160  ScalarEvolution *SE = PSE.getSE();
161  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162  const auto *CT =
163  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164 
165  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166  auto *Expr = PSE.getSCEV(Ptr);
167 
168  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
169  << " by: " << *Expr << "\n");
170  return Expr;
171  }
172 
173  // Otherwise, just return the SCEV of the original pointer.
174  return OrigSCEV;
175 }
176 
177 /// Calculate Start and End points of memory access.
178 /// Let's assume A is the first access and B is a memory access on N-th loop
179 /// iteration. Then B is calculated as:
180 /// B = A + Step*N .
181 /// Step value may be positive or negative.
182 /// N is a calculated back-edge taken count:
183 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184 /// Start and End points are calculated in the following way:
185 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186 /// where SizeOfElt is the size of single memory access in bytes.
187 ///
188 /// There is no conflict when the intervals are disjoint:
189 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191  unsigned DepSetId, unsigned ASId,
192  const ValueToValueMap &Strides,
194  // Get the stride replaced scev.
195  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196  ScalarEvolution *SE = PSE.getSE();
197 
198  const SCEV *ScStart;
199  const SCEV *ScEnd;
200 
201  if (SE->isLoopInvariant(Sc, Lp))
202  ScStart = ScEnd = Sc;
203  else {
204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205  assert(AR && "Invalid addrec expression");
206  const SCEV *Ex = PSE.getBackedgeTakenCount();
207 
208  ScStart = AR->getStart();
209  ScEnd = AR->evaluateAtIteration(Ex, *SE);
210  const SCEV *Step = AR->getStepRecurrence(*SE);
211 
212  // For expressions with negative step, the upper bound is ScStart and the
213  // lower bound is ScEnd.
214  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215  if (CStep->getValue()->isNegative())
216  std::swap(ScStart, ScEnd);
217  } else {
218  // Fallback case: the step is not constant, but we can still
219  // get the upper and lower bounds of the interval by using min/max
220  // expressions.
221  ScStart = SE->getUMinExpr(ScStart, ScEnd);
222  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223  }
224  // Add the size of the pointed element to ScEnd.
225  unsigned EltSize =
227  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229  }
230 
231  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232 }
233 
235 RuntimePointerChecking::generateChecks() const {
237 
238  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242 
243  if (needsChecking(CGI, CGJ))
244  Checks.push_back(std::make_pair(&CGI, &CGJ));
245  }
246  }
247  return Checks;
248 }
249 
250 void RuntimePointerChecking::generateChecks(
251  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252  assert(Checks.empty() && "Checks is not empty");
253  groupChecks(DepCands, UseDependencies);
254  Checks = generateChecks();
255 }
256 
258  const CheckingPtrGroup &N) const {
259  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261  if (needsChecking(M.Members[I], N.Members[J]))
262  return true;
263  return false;
264 }
265 
266 /// Compare \p I and \p J and return the minimum.
267 /// Return nullptr in case we couldn't find an answer.
268 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269  ScalarEvolution *SE) {
270  const SCEV *Diff = SE->getMinusSCEV(J, I);
271  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272 
273  if (!C)
274  return nullptr;
275  if (C->getValue()->isNegative())
276  return J;
277  return I;
278 }
279 
281  const SCEV *Start = RtCheck.Pointers[Index].Start;
282  const SCEV *End = RtCheck.Pointers[Index].End;
283 
284  // Compare the starts and ends with the known minimum and maximum
285  // of this set. We need to know how we compare against the min/max
286  // of the set in order to be able to emit memchecks.
287  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288  if (!Min0)
289  return false;
290 
291  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292  if (!Min1)
293  return false;
294 
295  // Update the low bound expression if we've found a new min value.
296  if (Min0 == Start)
297  Low = Start;
298 
299  // Update the high bound expression if we've found a new max value.
300  if (Min1 != End)
301  High = End;
302 
303  Members.push_back(Index);
304  return true;
305 }
306 
307 void RuntimePointerChecking::groupChecks(
308  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309  // We build the groups from dependency candidates equivalence classes
310  // because:
311  // - We know that pointers in the same equivalence class share
312  // the same underlying object and therefore there is a chance
313  // that we can compare pointers
314  // - We wouldn't be able to merge two pointers for which we need
315  // to emit a memcheck. The classes in DepCands are already
316  // conveniently built such that no two pointers in the same
317  // class need checking against each other.
318 
319  // We use the following (greedy) algorithm to construct the groups
320  // For every pointer in the equivalence class:
321  // For each existing group:
322  // - if the difference between this pointer and the min/max bounds
323  // of the group is a constant, then make the pointer part of the
324  // group and update the min/max bounds of that group as required.
325 
326  CheckingGroups.clear();
327 
328  // If we need to check two pointers to the same underlying object
329  // with a non-constant difference, we shouldn't perform any pointer
330  // grouping with those pointers. This is because we can easily get
331  // into cases where the resulting check would return false, even when
332  // the accesses are safe.
333  //
334  // The following example shows this:
335  // for (i = 0; i < 1000; ++i)
336  // a[5000 + i * m] = a[i] + a[i + 9000]
337  //
338  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339  // (0, 10000) which is always false. However, if m is 1, there is no
340  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341  // us to perform an accurate check in this case.
342  //
343  // The above case requires that we have an UnknownDependence between
344  // accesses to the same underlying object. This cannot happen unless
345  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
346  // is also false. In this case we will use the fallback path and create
347  // separate checking groups for all pointers.
348 
349  // If we don't have the dependency partitions, construct a new
350  // checking pointer group for each pointer. This is also required
351  // for correctness, because in this case we can have checking between
352  // pointers to the same underlying object.
353  if (!UseDependencies) {
354  for (unsigned I = 0; I < Pointers.size(); ++I)
355  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356  return;
357  }
358 
359  unsigned TotalComparisons = 0;
360 
361  DenseMap<Value *, unsigned> PositionMap;
362  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363  PositionMap[Pointers[Index].PointerValue] = Index;
364 
365  // We need to keep track of what pointers we've already seen so we
366  // don't process them twice.
368 
369  // Go through all equivalence classes, get the "pointer check groups"
370  // and add them to the overall solution. We use the order in which accesses
371  // appear in 'Pointers' to enforce determinism.
372  for (unsigned I = 0; I < Pointers.size(); ++I) {
373  // We've seen this pointer before, and therefore already processed
374  // its equivalence class.
375  if (Seen.count(I))
376  continue;
377 
378  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379  Pointers[I].IsWritePtr);
380 
382  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383 
384  // Because DepCands is constructed by visiting accesses in the order in
385  // which they appear in alias sets (which is deterministic) and the
386  // iteration order within an equivalence class member is only dependent on
387  // the order in which unions and insertions are performed on the
388  // equivalence class, the iteration order is deterministic.
389  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390  MI != ME; ++MI) {
391  unsigned Pointer = PositionMap[MI->getPointer()];
392  bool Merged = false;
393  // Mark this pointer as seen.
394  Seen.insert(Pointer);
395 
396  // Go through all the existing sets and see if we can find one
397  // which can include this pointer.
398  for (CheckingPtrGroup &Group : Groups) {
399  // Don't perform more than a certain amount of comparisons.
400  // This should limit the cost of grouping the pointers to something
401  // reasonable. If we do end up hitting this threshold, the algorithm
402  // will create separate groups for all remaining pointers.
403  if (TotalComparisons > MemoryCheckMergeThreshold)
404  break;
405 
406  TotalComparisons++;
407 
408  if (Group.addPointer(Pointer)) {
409  Merged = true;
410  break;
411  }
412  }
413 
414  if (!Merged)
415  // We couldn't add this pointer to any existing set or the threshold
416  // for the number of comparisons has been reached. Create a new group
417  // to hold the current pointer.
418  Groups.push_back(CheckingPtrGroup(Pointer, *this));
419  }
420 
421  // We've computed the grouped checks for this partition.
422  // Save the results and continue with the next one.
423  std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
424  }
425 }
426 
428  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429  unsigned PtrIdx2) {
430  return (PtrToPartition[PtrIdx1] != -1 &&
431  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432 }
433 
434 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435  const PointerInfo &PointerI = Pointers[I];
436  const PointerInfo &PointerJ = Pointers[J];
437 
438  // No need to check if two readonly pointers intersect.
439  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440  return false;
441 
442  // Only need to check pointers between two different dependency sets.
443  if (PointerI.DependencySetId == PointerJ.DependencySetId)
444  return false;
445 
446  // Only need to check pointers in the same alias set.
447  if (PointerI.AliasSetId != PointerJ.AliasSetId)
448  return false;
449 
450  return true;
451 }
452 
454  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455  unsigned Depth) const {
456  unsigned N = 0;
457  for (const auto &Check : Checks) {
458  const auto &First = Check.first->Members, &Second = Check.second->Members;
459 
460  OS.indent(Depth) << "Check " << N++ << ":\n";
461 
462  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463  for (unsigned K = 0; K < First.size(); ++K)
464  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465 
466  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467  for (unsigned K = 0; K < Second.size(); ++K)
468  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469  }
470 }
471 
473 
474  OS.indent(Depth) << "Run-time memory checks:\n";
475  printChecks(OS, Checks, Depth);
476 
477  OS.indent(Depth) << "Grouped accesses:\n";
478  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479  const auto &CG = CheckingGroups[I];
480 
481  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483  << ")\n";
484  for (unsigned J = 0; J < CG.Members.size(); ++J) {
485  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486  << "\n";
487  }
488  }
489 }
490 
491 namespace {
492 
493 /// Analyses memory accesses in a loop.
494 ///
495 /// Checks whether run time pointer checks are needed and builds sets for data
496 /// dependence checking.
497 class AccessAnalysis {
498 public:
499  /// Read or write access location.
500  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502 
503  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
506  : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
507  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
508 
509  /// Register a load and whether it is only read from.
510  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511  Value *Ptr = const_cast<Value*>(Loc.Ptr);
512  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
513  Accesses.insert(MemAccessInfo(Ptr, false));
514  if (IsReadOnly)
515  ReadOnlyPtr.insert(Ptr);
516  }
517 
518  /// Register a store.
519  void addStore(MemoryLocation &Loc) {
520  Value *Ptr = const_cast<Value*>(Loc.Ptr);
521  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
522  Accesses.insert(MemAccessInfo(Ptr, true));
523  }
524 
525  /// Check if we can emit a run-time no-alias check for \p Access.
526  ///
527  /// Returns true if we can emit a run-time no alias check for \p Access.
528  /// If we can check this access, this also adds it to a dependence set and
529  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530  /// we will attempt to use additional run-time checks in order to get
531  /// the bounds of the pointer.
532  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533  MemAccessInfo Access,
534  const ValueToValueMap &Strides,
535  DenseMap<Value *, unsigned> &DepSetId,
536  Loop *TheLoop, unsigned &RunningDepId,
537  unsigned ASId, bool ShouldCheckStride,
538  bool Assume);
539 
540  /// Check whether we can check the pointers at runtime for
541  /// non-intersection.
542  ///
543  /// Returns true if we need no check or if we do and we can generate them
544  /// (i.e. the pointers have computable bounds).
545  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546  Loop *TheLoop, const ValueToValueMap &Strides,
547  bool ShouldCheckWrap = false);
548 
549  /// Goes over all memory accesses, checks whether a RT check is needed
550  /// and builds sets of dependent accesses.
551  void buildDependenceSets() {
552  processMemAccesses();
553  }
554 
555  /// Initial processing of memory accesses determined that we need to
556  /// perform dependency checking.
557  ///
558  /// Note that this can later be cleared if we retry memcheck analysis without
559  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
560  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562  /// We decided that no dependence analysis would be used. Reset the state.
563  void resetDepChecks(MemoryDepChecker &DepChecker) {
564  CheckDeps.clear();
565  DepChecker.clearDependences();
566  }
567 
568  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571  typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573  /// Go over all memory access and check whether runtime pointer checks
574  /// are needed and build sets of dependency check candidates.
575  void processMemAccesses();
576 
577  /// Set of all accesses.
578  PtrAccessSet Accesses;
579 
580  const DataLayout &DL;
581 
582  /// The loop being checked.
583  const Loop *TheLoop;
584 
585  /// List of accesses that need a further dependence check.
586  MemAccessInfoList CheckDeps;
587 
588  /// Set of pointers that are read only.
589  SmallPtrSet<Value*, 16> ReadOnlyPtr;
590 
591  /// An alias set tracker to partition the access set by underlying object and
592  //intrinsic property (such as TBAA metadata).
593  AliasSetTracker AST;
594 
595  LoopInfo *LI;
596 
597  /// Sets of potentially dependent accesses - members of one set share an
598  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
599  /// dependence check.
601 
602  /// Initial processing of memory accesses determined that we may need
603  /// to add memchecks. Perform the analysis to determine the necessary checks.
604  ///
605  /// Note that, this is different from isDependencyCheckNeeded. When we retry
606  /// memcheck analysis without dependency checking
607  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
608  /// while this remains set if we have potentially dependent accesses.
609  bool IsRTCheckAnalysisNeeded;
610 
611  /// The SCEV predicate containing all the SCEV-related assumptions.
613 };
614 
615 } // end anonymous namespace
616 
617 /// Check whether a pointer can participate in a runtime bounds check.
618 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
619 /// by adding run-time checks (overflow checks) if necessary.
621  const ValueToValueMap &Strides, Value *Ptr,
622  Loop *L, bool Assume) {
623  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624 
625  // The bounds for loop-invariant pointer is trivial.
626  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
627  return true;
628 
629  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
630 
631  if (!AR && Assume)
632  AR = PSE.getAsAddRec(Ptr);
633 
634  if (!AR)
635  return false;
636 
637  return AR->isAffine();
638 }
639 
640 /// Check whether a pointer address cannot wrap.
642  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
643  const SCEV *PtrScev = PSE.getSCEV(Ptr);
644  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
645  return true;
646 
647  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
648  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
649  return true;
650 
651  return false;
652 }
653 
654 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
655  MemAccessInfo Access,
656  const ValueToValueMap &StridesMap,
657  DenseMap<Value *, unsigned> &DepSetId,
658  Loop *TheLoop, unsigned &RunningDepId,
659  unsigned ASId, bool ShouldCheckWrap,
660  bool Assume) {
661  Value *Ptr = Access.getPointer();
662 
663  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
664  return false;
665 
666  // When we run after a failing dependency check we have to make sure
667  // we don't have wrapping pointers.
668  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
669  auto *Expr = PSE.getSCEV(Ptr);
670  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
671  return false;
672  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
673  }
674 
675  // The id of the dependence set.
676  unsigned DepId;
677 
678  if (isDependencyCheckNeeded()) {
679  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
680  unsigned &LeaderId = DepSetId[Leader];
681  if (!LeaderId)
682  LeaderId = RunningDepId++;
683  DepId = LeaderId;
684  } else
685  // Each access has its own dependence set.
686  DepId = RunningDepId++;
687 
688  bool IsWrite = Access.getInt();
689  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
690  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
691 
692  return true;
693  }
694 
695 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
696  ScalarEvolution *SE, Loop *TheLoop,
697  const ValueToValueMap &StridesMap,
698  bool ShouldCheckWrap) {
699  // Find pointers with computable bounds. We are going to use this information
700  // to place a runtime bound check.
701  bool CanDoRT = true;
702 
703  bool NeedRTCheck = false;
704  if (!IsRTCheckAnalysisNeeded) return true;
705 
706  bool IsDepCheckNeeded = isDependencyCheckNeeded();
707 
708  // We assign a consecutive id to access from different alias sets.
709  // Accesses between different groups doesn't need to be checked.
710  unsigned ASId = 1;
711  for (auto &AS : AST) {
712  int NumReadPtrChecks = 0;
713  int NumWritePtrChecks = 0;
714  bool CanDoAliasSetRT = true;
715 
716  // We assign consecutive id to access from different dependence sets.
717  // Accesses within the same set don't need a runtime check.
718  unsigned RunningDepId = 1;
720 
722 
723  for (auto A : AS) {
724  Value *Ptr = A.getValue();
725  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
726  MemAccessInfo Access(Ptr, IsWrite);
727 
728  if (IsWrite)
729  ++NumWritePtrChecks;
730  else
731  ++NumReadPtrChecks;
732 
733  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
734  RunningDepId, ASId, ShouldCheckWrap, false)) {
735  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
736  Retries.push_back(Access);
737  CanDoAliasSetRT = false;
738  }
739  }
740 
741  // If we have at least two writes or one write and a read then we need to
742  // check them. But there is no need to checks if there is only one
743  // dependence set for this alias set.
744  //
745  // Note that this function computes CanDoRT and NeedRTCheck independently.
746  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
747  // for which we couldn't find the bounds but we don't actually need to emit
748  // any checks so it does not matter.
749  bool NeedsAliasSetRTCheck = false;
750  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
751  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
752  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
753 
754  // We need to perform run-time alias checks, but some pointers had bounds
755  // that couldn't be checked.
756  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
757  // Reset the CanDoSetRt flag and retry all accesses that have failed.
758  // We know that we need these checks, so we can now be more aggressive
759  // and add further checks if required (overflow checks).
760  CanDoAliasSetRT = true;
761  for (auto Access : Retries)
762  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
763  TheLoop, RunningDepId, ASId,
764  ShouldCheckWrap, /*Assume=*/true)) {
765  CanDoAliasSetRT = false;
766  break;
767  }
768  }
769 
770  CanDoRT &= CanDoAliasSetRT;
771  NeedRTCheck |= NeedsAliasSetRTCheck;
772  ++ASId;
773  }
774 
775  // If the pointers that we would use for the bounds comparison have different
776  // address spaces, assume the values aren't directly comparable, so we can't
777  // use them for the runtime check. We also have to assume they could
778  // overlap. In the future there should be metadata for whether address spaces
779  // are disjoint.
780  unsigned NumPointers = RtCheck.Pointers.size();
781  for (unsigned i = 0; i < NumPointers; ++i) {
782  for (unsigned j = i + 1; j < NumPointers; ++j) {
783  // Only need to check pointers between two different dependency sets.
784  if (RtCheck.Pointers[i].DependencySetId ==
785  RtCheck.Pointers[j].DependencySetId)
786  continue;
787  // Only need to check pointers in the same alias set.
788  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
789  continue;
790 
791  Value *PtrI = RtCheck.Pointers[i].PointerValue;
792  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
793 
794  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
795  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
796  if (ASi != ASj) {
797  LLVM_DEBUG(
798  dbgs() << "LAA: Runtime check would require comparison between"
799  " different address spaces\n");
800  return false;
801  }
802  }
803  }
804 
805  if (NeedRTCheck && CanDoRT)
806  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
807 
808  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
809  << " pointer comparisons.\n");
810 
811  RtCheck.Need = NeedRTCheck;
812 
813  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
814  if (!CanDoRTIfNeeded)
815  RtCheck.reset();
816  return CanDoRTIfNeeded;
817 }
818 
819 void AccessAnalysis::processMemAccesses() {
820  // We process the set twice: first we process read-write pointers, last we
821  // process read-only pointers. This allows us to skip dependence tests for
822  // read-only pointers.
823 
824  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
825  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
826  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
827  LLVM_DEBUG({
828  for (auto A : Accesses)
829  dbgs() << "\t" << *A.getPointer() << " (" <<
830  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
831  "read-only" : "read")) << ")\n";
832  });
833 
834  // The AliasSetTracker has nicely partitioned our pointers by metadata
835  // compatibility and potential for underlying-object overlap. As a result, we
836  // only need to check for potential pointer dependencies within each alias
837  // set.
838  for (auto &AS : AST) {
839  // Note that both the alias-set tracker and the alias sets themselves used
840  // linked lists internally and so the iteration order here is deterministic
841  // (matching the original instruction order within each set).
842 
843  bool SetHasWrite = false;
844 
845  // Map of pointers to last access encountered.
846  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
847  UnderlyingObjToAccessMap ObjToLastAccess;
848 
849  // Set of access to check after all writes have been processed.
850  PtrAccessSet DeferredAccesses;
851 
852  // Iterate over each alias set twice, once to process read/write pointers,
853  // and then to process read-only pointers.
854  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
855  bool UseDeferred = SetIteration > 0;
856  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
857 
858  for (auto AV : AS) {
859  Value *Ptr = AV.getValue();
860 
861  // For a single memory access in AliasSetTracker, Accesses may contain
862  // both read and write, and they both need to be handled for CheckDeps.
863  for (auto AC : S) {
864  if (AC.getPointer() != Ptr)
865  continue;
866 
867  bool IsWrite = AC.getInt();
868 
869  // If we're using the deferred access set, then it contains only
870  // reads.
871  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
872  if (UseDeferred && !IsReadOnlyPtr)
873  continue;
874  // Otherwise, the pointer must be in the PtrAccessSet, either as a
875  // read or a write.
876  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
877  S.count(MemAccessInfo(Ptr, false))) &&
878  "Alias-set pointer not in the access set?");
879 
880  MemAccessInfo Access(Ptr, IsWrite);
881  DepCands.insert(Access);
882 
883  // Memorize read-only pointers for later processing and skip them in
884  // the first round (they need to be checked after we have seen all
885  // write pointers). Note: we also mark pointer that are not
886  // consecutive as "read-only" pointers (so that we check
887  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
888  if (!UseDeferred && IsReadOnlyPtr) {
889  DeferredAccesses.insert(Access);
890  continue;
891  }
892 
893  // If this is a write - check other reads and writes for conflicts. If
894  // this is a read only check other writes for conflicts (but only if
895  // there is no other write to the ptr - this is an optimization to
896  // catch "a[i] = a[i] + " without having to do a dependence check).
897  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
898  CheckDeps.push_back(Access);
899  IsRTCheckAnalysisNeeded = true;
900  }
901 
902  if (IsWrite)
903  SetHasWrite = true;
904 
905  // Create sets of pointers connected by a shared alias set and
906  // underlying object.
907  typedef SmallVector<Value *, 16> ValueVector;
908  ValueVector TempObjects;
909 
910  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
911  LLVM_DEBUG(dbgs()
912  << "Underlying objects for pointer " << *Ptr << "\n");
913  for (Value *UnderlyingObj : TempObjects) {
914  // nullptr never alias, don't join sets for pointer that have "null"
915  // in their UnderlyingObjects list.
916  if (isa<ConstantPointerNull>(UnderlyingObj) &&
918  TheLoop->getHeader()->getParent(),
919  UnderlyingObj->getType()->getPointerAddressSpace()))
920  continue;
921 
922  UnderlyingObjToAccessMap::iterator Prev =
923  ObjToLastAccess.find(UnderlyingObj);
924  if (Prev != ObjToLastAccess.end())
925  DepCands.unionSets(Access, Prev->second);
926 
927  ObjToLastAccess[UnderlyingObj] = Access;
928  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
929  }
930  }
931  }
932  }
933  }
934 }
935 
936 static bool isInBoundsGep(Value *Ptr) {
937  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
938  return GEP->isInBounds();
939  return false;
940 }
941 
942 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
943 /// i.e. monotonically increasing/decreasing.
944 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
945  PredicatedScalarEvolution &PSE, const Loop *L) {
946  // FIXME: This should probably only return true for NUW.
948  return true;
949 
950  // Scalar evolution does not propagate the non-wrapping flags to values that
951  // are derived from a non-wrapping induction variable because non-wrapping
952  // could be flow-sensitive.
953  //
954  // Look through the potentially overflowing instruction to try to prove
955  // non-wrapping for the *specific* value of Ptr.
956 
957  // The arithmetic implied by an inbounds GEP can't overflow.
958  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
959  if (!GEP || !GEP->isInBounds())
960  return false;
961 
962  // Make sure there is only one non-const index and analyze that.
963  Value *NonConstIndex = nullptr;
964  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
965  if (!isa<ConstantInt>(Index)) {
966  if (NonConstIndex)
967  return false;
968  NonConstIndex = Index;
969  }
970  if (!NonConstIndex)
971  // The recurrence is on the pointer, ignore for now.
972  return false;
973 
974  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
975  // AddRec using a NSW operation.
976  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
977  if (OBO->hasNoSignedWrap() &&
978  // Assume constant for other the operand so that the AddRec can be
979  // easily found.
980  isa<ConstantInt>(OBO->getOperand(1))) {
981  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
982 
983  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
984  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
985  }
986 
987  return false;
988 }
989 
990 /// Check whether the access through \p Ptr has a constant stride.
992  const Loop *Lp, const ValueToValueMap &StridesMap,
993  bool Assume, bool ShouldCheckWrap) {
994  Type *Ty = Ptr->getType();
995  assert(Ty->isPointerTy() && "Unexpected non-ptr");
996 
997  // Make sure that the pointer does not point to aggregate types.
998  auto *PtrTy = cast<PointerType>(Ty);
999  if (PtrTy->getElementType()->isAggregateType()) {
1000  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1001  << *Ptr << "\n");
1002  return 0;
1003  }
1004 
1005  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1006 
1007  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1008  if (Assume && !AR)
1009  AR = PSE.getAsAddRec(Ptr);
1010 
1011  if (!AR) {
1012  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1013  << " SCEV: " << *PtrScev << "\n");
1014  return 0;
1015  }
1016 
1017  // The accesss function must stride over the innermost loop.
1018  if (Lp != AR->getLoop()) {
1019  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1020  << *Ptr << " SCEV: " << *AR << "\n");
1021  return 0;
1022  }
1023 
1024  // The address calculation must not wrap. Otherwise, a dependence could be
1025  // inverted.
1026  // An inbounds getelementptr that is a AddRec with a unit stride
1027  // cannot wrap per definition. The unit stride requirement is checked later.
1028  // An getelementptr without an inbounds attribute and unit stride would have
1029  // to access the pointer value "0" which is undefined behavior in address
1030  // space 0, therefore we can also vectorize this case.
1031  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1032  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1034  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1035  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1037  PtrTy->getAddressSpace())) {
1038  if (Assume) {
1040  IsNoWrapAddRec = true;
1041  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1042  << "LAA: Pointer: " << *Ptr << "\n"
1043  << "LAA: SCEV: " << *AR << "\n"
1044  << "LAA: Added an overflow assumption\n");
1045  } else {
1046  LLVM_DEBUG(
1047  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1048  << *Ptr << " SCEV: " << *AR << "\n");
1049  return 0;
1050  }
1051  }
1052 
1053  // Check the step is constant.
1054  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1055 
1056  // Calculate the pointer stride and check if it is constant.
1057  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1058  if (!C) {
1059  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1060  << " SCEV: " << *AR << "\n");
1061  return 0;
1062  }
1063 
1064  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1065  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1066  const APInt &APStepVal = C->getAPInt();
1067 
1068  // Huge step value - give up.
1069  if (APStepVal.getBitWidth() > 64)
1070  return 0;
1071 
1072  int64_t StepVal = APStepVal.getSExtValue();
1073 
1074  // Strided access.
1075  int64_t Stride = StepVal / Size;
1076  int64_t Rem = StepVal % Size;
1077  if (Rem)
1078  return 0;
1079 
1080  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1081  // know we can't "wrap around the address space". In case of address space
1082  // zero we know that this won't happen without triggering undefined behavior.
1083  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1084  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1085  PtrTy->getAddressSpace()))) {
1086  if (Assume) {
1087  // We can avoid this case by adding a run-time check.
1088  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1089  << "inbouds or in address space 0 may wrap:\n"
1090  << "LAA: Pointer: " << *Ptr << "\n"
1091  << "LAA: SCEV: " << *AR << "\n"
1092  << "LAA: Added an overflow assumption\n");
1094  } else
1095  return 0;
1096  }
1097 
1098  return Stride;
1099 }
1100 
1102  ScalarEvolution &SE,
1103  SmallVectorImpl<unsigned> &SortedIndices) {
1105  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1106  "Expected list of pointer operands.");
1108  OffValPairs.reserve(VL.size());
1109 
1110  // Walk over the pointers, and map each of them to an offset relative to
1111  // first pointer in the array.
1112  Value *Ptr0 = VL[0];
1113  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1114  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1115 
1117  for (auto *Ptr : VL) {
1118  // TODO: Outline this code as a special, more time consuming, version of
1119  // computeConstantDifference() function.
1120  if (Ptr->getType()->getPointerAddressSpace() !=
1121  Ptr0->getType()->getPointerAddressSpace())
1122  return false;
1123  // If a pointer refers to a different underlying object, bail - the
1124  // pointers are by definition incomparable.
1125  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1126  if (CurrObj != Obj0)
1127  return false;
1128 
1129  const SCEV *Scev = SE.getSCEV(Ptr);
1130  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1131  // The pointers may not have a constant offset from each other, or SCEV
1132  // may just not be smart enough to figure out they do. Regardless,
1133  // there's nothing we can do.
1134  if (!Diff)
1135  return false;
1136 
1137  // Check if the pointer with the same offset is found.
1138  int64_t Offset = Diff->getAPInt().getSExtValue();
1139  if (!Offsets.insert(Offset).second)
1140  return false;
1141  OffValPairs.emplace_back(Offset, Ptr);
1142  }
1143  SortedIndices.clear();
1144  SortedIndices.resize(VL.size());
1145  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1146 
1147  // Sort the memory accesses and keep the order of their uses in UseOrder.
1148  std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
1149  [&OffValPairs](unsigned Left, unsigned Right) {
1150  return OffValPairs[Left].first < OffValPairs[Right].first;
1151  });
1152 
1153  // Check if the order is consecutive already.
1154  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1155  return I == SortedIndices[I];
1156  }))
1157  SortedIndices.clear();
1158 
1159  return true;
1160 }
1161 
1162 /// Take the address space operand from the Load/Store instruction.
1163 /// Returns -1 if this is not a valid Load/Store instruction.
1164 static unsigned getAddressSpaceOperand(Value *I) {
1165  if (LoadInst *L = dyn_cast<LoadInst>(I))
1166  return L->getPointerAddressSpace();
1167  if (StoreInst *S = dyn_cast<StoreInst>(I))
1168  return S->getPointerAddressSpace();
1169  return -1;
1170 }
1171 
1172 /// Returns true if the memory operations \p A and \p B are consecutive.
1174  ScalarEvolution &SE, bool CheckType) {
1175  Value *PtrA = getLoadStorePointerOperand(A);
1176  Value *PtrB = getLoadStorePointerOperand(B);
1177  unsigned ASA = getAddressSpaceOperand(A);
1178  unsigned ASB = getAddressSpaceOperand(B);
1179 
1180  // Check that the address spaces match and that the pointers are valid.
1181  if (!PtrA || !PtrB || (ASA != ASB))
1182  return false;
1183 
1184  // Make sure that A and B are different pointers.
1185  if (PtrA == PtrB)
1186  return false;
1187 
1188  // Make sure that A and B have the same type if required.
1189  if (CheckType && PtrA->getType() != PtrB->getType())
1190  return false;
1191 
1192  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1193  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1194  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1195 
1196  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1197  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1198  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1199 
1200  // OffsetDelta = OffsetB - OffsetA;
1201  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1202  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1203  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1204  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1205  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1206  // Check if they are based on the same pointer. That makes the offsets
1207  // sufficient.
1208  if (PtrA == PtrB)
1209  return OffsetDelta == Size;
1210 
1211  // Compute the necessary base pointer delta to have the necessary final delta
1212  // equal to the size.
1213  // BaseDelta = Size - OffsetDelta;
1214  const SCEV *SizeSCEV = SE.getConstant(Size);
1215  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1216 
1217  // Otherwise compute the distance with SCEV between the base pointers.
1218  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1219  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1220  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1221  return X == PtrSCEVB;
1222 }
1223 
1225  switch (Type) {
1226  case NoDep:
1227  case Forward:
1228  case BackwardVectorizable:
1229  return true;
1230 
1231  case Unknown:
1232  case ForwardButPreventsForwarding:
1233  case Backward:
1234  case BackwardVectorizableButPreventsForwarding:
1235  return false;
1236  }
1237  llvm_unreachable("unexpected DepType!");
1238 }
1239 
1241  switch (Type) {
1242  case NoDep:
1243  case Forward:
1244  case ForwardButPreventsForwarding:
1245  case Unknown:
1246  return false;
1247 
1248  case BackwardVectorizable:
1249  case Backward:
1250  case BackwardVectorizableButPreventsForwarding:
1251  return true;
1252  }
1253  llvm_unreachable("unexpected DepType!");
1254 }
1255 
1257  return isBackward() || Type == Unknown;
1258 }
1259 
1261  switch (Type) {
1262  case Forward:
1263  case ForwardButPreventsForwarding:
1264  return true;
1265 
1266  case NoDep:
1267  case Unknown:
1268  case BackwardVectorizable:
1269  case Backward:
1270  case BackwardVectorizableButPreventsForwarding:
1271  return false;
1272  }
1273  llvm_unreachable("unexpected DepType!");
1274 }
1275 
1276 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1277  uint64_t TypeByteSize) {
1278  // If loads occur at a distance that is not a multiple of a feasible vector
1279  // factor store-load forwarding does not take place.
1280  // Positive dependences might cause troubles because vectorizing them might
1281  // prevent store-load forwarding making vectorized code run a lot slower.
1282  // a[i] = a[i-3] ^ a[i-8];
1283  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1284  // hence on your typical architecture store-load forwarding does not take
1285  // place. Vectorizing in such cases does not make sense.
1286  // Store-load forwarding distance.
1287 
1288  // After this many iterations store-to-load forwarding conflicts should not
1289  // cause any slowdowns.
1290  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1291  // Maximum vector factor.
1292  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1293  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1294 
1295  // Compute the smallest VF at which the store and load would be misaligned.
1296  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1297  VF *= 2) {
1298  // If the number of vector iteration between the store and the load are
1299  // small we could incur conflicts.
1300  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1301  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1302  break;
1303  }
1304  }
1305 
1306  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1307  LLVM_DEBUG(
1308  dbgs() << "LAA: Distance " << Distance
1309  << " that could cause a store-load forwarding conflict\n");
1310  return true;
1311  }
1312 
1313  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1314  MaxVFWithoutSLForwardIssues !=
1315  VectorizerParams::MaxVectorWidth * TypeByteSize)
1316  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1317  return false;
1318 }
1319 
1320 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1321 /// memory accesses, that have the same stride whose absolute value is given
1322 /// in \p Stride, and that have the same type size \p TypeByteSize,
1323 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1324 /// possible to prove statically that the dependence distance is larger
1325 /// than the range that the accesses will travel through the execution of
1326 /// the loop. If so, return true; false otherwise. This is useful for
1327 /// example in loops such as the following (PR31098):
1328 /// for (i = 0; i < D; ++i) {
1329 /// = out[i];
1330 /// out[i+D] =
1331 /// }
1333  const SCEV &BackedgeTakenCount,
1334  const SCEV &Dist, uint64_t Stride,
1335  uint64_t TypeByteSize) {
1336 
1337  // If we can prove that
1338  // (**) |Dist| > BackedgeTakenCount * Step
1339  // where Step is the absolute stride of the memory accesses in bytes,
1340  // then there is no dependence.
1341  //
1342  // Ratioanle:
1343  // We basically want to check if the absolute distance (|Dist/Step|)
1344  // is >= the loop iteration count (or > BackedgeTakenCount).
1345  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1346  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1347  // that the dependence distance is >= VF; This is checked elsewhere.
1348  // But in some cases we can prune unknown dependence distances early, and
1349  // even before selecting the VF, and without a runtime test, by comparing
1350  // the distance against the loop iteration count. Since the vectorized code
1351  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1352  // also guarantees that distance >= VF.
1353  //
1354  const uint64_t ByteStride = Stride * TypeByteSize;
1355  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1356  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1357 
1358  const SCEV *CastedDist = &Dist;
1359  const SCEV *CastedProduct = Product;
1360  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1361  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1362 
1363  // The dependence distance can be positive/negative, so we sign extend Dist;
1364  // The multiplication of the absolute stride in bytes and the
1365  // backdgeTakenCount is non-negative, so we zero extend Product.
1366  if (DistTypeSize > ProductTypeSize)
1367  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1368  else
1369  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1370 
1371  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1372  // (If so, then we have proven (**) because |Dist| >= Dist)
1373  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1374  if (SE.isKnownPositive(Minus))
1375  return true;
1376 
1377  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1378  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1379  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1380  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1381  if (SE.isKnownPositive(Minus))
1382  return true;
1383 
1384  return false;
1385 }
1386 
1387 /// Check the dependence for two accesses with the same stride \p Stride.
1388 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1389 /// bytes.
1390 ///
1391 /// \returns true if they are independent.
1392 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1393  uint64_t TypeByteSize) {
1394  assert(Stride > 1 && "The stride must be greater than 1");
1395  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1396  assert(Distance > 0 && "The distance must be non-zero");
1397 
1398  // Skip if the distance is not multiple of type byte size.
1399  if (Distance % TypeByteSize)
1400  return false;
1401 
1402  uint64_t ScaledDist = Distance / TypeByteSize;
1403 
1404  // No dependence if the scaled distance is not multiple of the stride.
1405  // E.g.
1406  // for (i = 0; i < 1024 ; i += 4)
1407  // A[i+2] = A[i] + 1;
1408  //
1409  // Two accesses in memory (scaled distance is 2, stride is 4):
1410  // | A[0] | | | | A[4] | | | |
1411  // | | | A[2] | | | | A[6] | |
1412  //
1413  // E.g.
1414  // for (i = 0; i < 1024 ; i += 3)
1415  // A[i+4] = A[i] + 1;
1416  //
1417  // Two accesses in memory (scaled distance is 4, stride is 3):
1418  // | A[0] | | | A[3] | | | A[6] | | |
1419  // | | | | | A[4] | | | A[7] | |
1420  return ScaledDist % Stride;
1421 }
1422 
1424 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1425  const MemAccessInfo &B, unsigned BIdx,
1426  const ValueToValueMap &Strides) {
1427  assert (AIdx < BIdx && "Must pass arguments in program order");
1428 
1429  Value *APtr = A.getPointer();
1430  Value *BPtr = B.getPointer();
1431  bool AIsWrite = A.getInt();
1432  bool BIsWrite = B.getInt();
1433 
1434  // Two reads are independent.
1435  if (!AIsWrite && !BIsWrite)
1436  return Dependence::NoDep;
1437 
1438  // We cannot check pointers in different address spaces.
1439  if (APtr->getType()->getPointerAddressSpace() !=
1440  BPtr->getType()->getPointerAddressSpace())
1441  return Dependence::Unknown;
1442 
1443  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1444  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1445 
1446  const SCEV *Src = PSE.getSCEV(APtr);
1447  const SCEV *Sink = PSE.getSCEV(BPtr);
1448 
1449  // If the induction step is negative we have to invert source and sink of the
1450  // dependence.
1451  if (StrideAPtr < 0) {
1452  std::swap(APtr, BPtr);
1453  std::swap(Src, Sink);
1454  std::swap(AIsWrite, BIsWrite);
1455  std::swap(AIdx, BIdx);
1456  std::swap(StrideAPtr, StrideBPtr);
1457  }
1458 
1459  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1460 
1461  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1462  << "(Induction step: " << StrideAPtr << ")\n");
1463  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1464  << *InstMap[BIdx] << ": " << *Dist << "\n");
1465 
1466  // Need accesses with constant stride. We don't want to vectorize
1467  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1468  // the address space.
1469  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1470  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1471  return Dependence::Unknown;
1472  }
1473 
1474  Type *ATy = APtr->getType()->getPointerElementType();
1475  Type *BTy = BPtr->getType()->getPointerElementType();
1476  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1477  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1478  uint64_t Stride = std::abs(StrideAPtr);
1479  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1480  if (!C) {
1481  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1482  isSafeDependenceDistance(DL, *(PSE.getSE()),
1483  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1484  TypeByteSize))
1485  return Dependence::NoDep;
1486 
1487  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1488  ShouldRetryWithRuntimeCheck = true;
1489  return Dependence::Unknown;
1490  }
1491 
1492  const APInt &Val = C->getAPInt();
1493  int64_t Distance = Val.getSExtValue();
1494 
1495  // Attempt to prove strided accesses independent.
1496  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1497  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1498  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1499  return Dependence::NoDep;
1500  }
1501 
1502  // Negative distances are not plausible dependencies.
1503  if (Val.isNegative()) {
1504  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1505  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1506  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1507  ATy != BTy)) {
1508  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1509  return Dependence::ForwardButPreventsForwarding;
1510  }
1511 
1512  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1513  return Dependence::Forward;
1514  }
1515 
1516  // Write to the same location with the same size.
1517  // Could be improved to assert type sizes are the same (i32 == float, etc).
1518  if (Val == 0) {
1519  if (ATy == BTy)
1520  return Dependence::Forward;
1521  LLVM_DEBUG(
1522  dbgs() << "LAA: Zero dependence difference but different types\n");
1523  return Dependence::Unknown;
1524  }
1525 
1526  assert(Val.isStrictlyPositive() && "Expect a positive value");
1527 
1528  if (ATy != BTy) {
1529  LLVM_DEBUG(
1530  dbgs()
1531  << "LAA: ReadWrite-Write positive dependency with different types\n");
1532  return Dependence::Unknown;
1533  }
1534 
1535  // Bail out early if passed-in parameters make vectorization not feasible.
1536  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1538  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1540  // The minimum number of iterations for a vectorized/unrolled version.
1541  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1542 
1543  // It's not vectorizable if the distance is smaller than the minimum distance
1544  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1545  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1546  // TypeByteSize (No need to plus the last gap distance).
1547  //
1548  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1549  // foo(int *A) {
1550  // int *B = (int *)((char *)A + 14);
1551  // for (i = 0 ; i < 1024 ; i += 2)
1552  // B[i] = A[i] + 1;
1553  // }
1554  //
1555  // Two accesses in memory (stride is 2):
1556  // | A[0] | | A[2] | | A[4] | | A[6] | |
1557  // | B[0] | | B[2] | | B[4] |
1558  //
1559  // Distance needs for vectorizing iterations except the last iteration:
1560  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1561  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1562  //
1563  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1564  // 12, which is less than distance.
1565  //
1566  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1567  // the minimum distance needed is 28, which is greater than distance. It is
1568  // not safe to do vectorization.
1569  uint64_t MinDistanceNeeded =
1570  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1571  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1572  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1573  << Distance << '\n');
1574  return Dependence::Backward;
1575  }
1576 
1577  // Unsafe if the minimum distance needed is greater than max safe distance.
1578  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1579  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1580  << MinDistanceNeeded << " size in bytes");
1581  return Dependence::Backward;
1582  }
1583 
1584  // Positive distance bigger than max vectorization factor.
1585  // FIXME: Should use max factor instead of max distance in bytes, which could
1586  // not handle different types.
1587  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1588  // void foo (int *A, char *B) {
1589  // for (unsigned i = 0; i < 1024; i++) {
1590  // A[i+2] = A[i] + 1;
1591  // B[i+2] = B[i] + 1;
1592  // }
1593  // }
1594  //
1595  // This case is currently unsafe according to the max safe distance. If we
1596  // analyze the two accesses on array B, the max safe dependence distance
1597  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1598  // is 8, which is less than 2 and forbidden vectorization, But actually
1599  // both A and B could be vectorized by 2 iterations.
1600  MaxSafeDepDistBytes =
1601  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1602 
1603  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1604  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1605  couldPreventStoreLoadForward(Distance, TypeByteSize))
1606  return Dependence::BackwardVectorizableButPreventsForwarding;
1607 
1608  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1609  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1610  << " with max VF = " << MaxVF << '\n');
1611  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1612  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1613  return Dependence::BackwardVectorizable;
1614 }
1615 
1617  MemAccessInfoList &CheckDeps,
1618  const ValueToValueMap &Strides) {
1619 
1620  MaxSafeDepDistBytes = -1;
1622  for (MemAccessInfo CurAccess : CheckDeps) {
1623  if (Visited.count(CurAccess))
1624  continue;
1625 
1626  // Get the relevant memory access set.
1628  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1629 
1630  // Check accesses within this set.
1632  AccessSets.member_begin(I);
1634  AccessSets.member_end();
1635 
1636  // Check every access pair.
1637  while (AI != AE) {
1638  Visited.insert(*AI);
1640  while (OI != AE) {
1641  // Check every accessing instruction pair in program order.
1642  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1643  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1644  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1645  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1646  auto A = std::make_pair(&*AI, *I1);
1647  auto B = std::make_pair(&*OI, *I2);
1648 
1649  assert(*I1 != *I2);
1650  if (*I1 > *I2)
1651  std::swap(A, B);
1652 
1654  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1655  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1656 
1657  // Gather dependences unless we accumulated MaxDependences
1658  // dependences. In that case return as soon as we find the first
1659  // unsafe dependence. This puts a limit on this quadratic
1660  // algorithm.
1661  if (RecordDependences) {
1662  if (Type != Dependence::NoDep)
1663  Dependences.push_back(Dependence(A.second, B.second, Type));
1664 
1665  if (Dependences.size() >= MaxDependences) {
1666  RecordDependences = false;
1667  Dependences.clear();
1668  LLVM_DEBUG(dbgs()
1669  << "Too many dependences, stopped recording\n");
1670  }
1671  }
1672  if (!RecordDependences && !SafeForVectorization)
1673  return false;
1674  }
1675  ++OI;
1676  }
1677  AI++;
1678  }
1679  }
1680 
1681  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1682  return SafeForVectorization;
1683 }
1684 
1687  MemAccessInfo Access(Ptr, isWrite);
1688  auto &IndexVector = Accesses.find(Access)->second;
1689 
1691  transform(IndexVector,
1692  std::back_inserter(Insts),
1693  [&](unsigned Idx) { return this->InstMap[Idx]; });
1694  return Insts;
1695 }
1696 
1697 const char *MemoryDepChecker::Dependence::DepName[] = {
1698  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1699  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1700 
1702  raw_ostream &OS, unsigned Depth,
1703  const SmallVectorImpl<Instruction *> &Instrs) const {
1704  OS.indent(Depth) << DepName[Type] << ":\n";
1705  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1706  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1707 }
1708 
1709 bool LoopAccessInfo::canAnalyzeLoop() {
1710  // We need to have a loop header.
1711  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1712  << TheLoop->getHeader()->getParent()->getName() << ": "
1713  << TheLoop->getHeader()->getName() << '\n');
1714 
1715  // We can only analyze innermost loops.
1716  if (!TheLoop->empty()) {
1717  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1718  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1719  return false;
1720  }
1721 
1722  // We must have a single backedge.
1723  if (TheLoop->getNumBackEdges() != 1) {
1724  LLVM_DEBUG(
1725  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1726  recordAnalysis("CFGNotUnderstood")
1727  << "loop control flow is not understood by analyzer";
1728  return false;
1729  }
1730 
1731  // We must have a single exiting block.
1732  if (!TheLoop->getExitingBlock()) {
1733  LLVM_DEBUG(
1734  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1735  recordAnalysis("CFGNotUnderstood")
1736  << "loop control flow is not understood by analyzer";
1737  return false;
1738  }
1739 
1740  // We only handle bottom-tested loops, i.e. loop in which the condition is
1741  // checked at the end of each iteration. With that we can assume that all
1742  // instructions in the loop are executed the same number of times.
1743  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1744  LLVM_DEBUG(
1745  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1746  recordAnalysis("CFGNotUnderstood")
1747  << "loop control flow is not understood by analyzer";
1748  return false;
1749  }
1750 
1751  // ScalarEvolution needs to be able to find the exit count.
1752  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1753  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1754  recordAnalysis("CantComputeNumberOfIterations")
1755  << "could not determine number of loop iterations";
1756  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1757  return false;
1758  }
1759 
1760  return true;
1761 }
1762 
1763 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1764  const TargetLibraryInfo *TLI,
1765  DominatorTree *DT) {
1766  typedef SmallPtrSet<Value*, 16> ValueSet;
1767 
1768  // Holds the Load and Store instructions.
1771 
1772  // Holds all the different accesses in the loop.
1773  unsigned NumReads = 0;
1774  unsigned NumReadWrites = 0;
1775 
1776  PtrRtChecking->Pointers.clear();
1777  PtrRtChecking->Need = false;
1778 
1779  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1780 
1781  // For each block.
1782  for (BasicBlock *BB : TheLoop->blocks()) {
1783  // Scan the BB and collect legal loads and stores.
1784  for (Instruction &I : *BB) {
1785  // If this is a load, save it. If this instruction can read from memory
1786  // but is not a load, then we quit. Notice that we don't handle function
1787  // calls that read or write.
1788  if (I.mayReadFromMemory()) {
1789  // Many math library functions read the rounding mode. We will only
1790  // vectorize a loop if it contains known function calls that don't set
1791  // the flag. Therefore, it is safe to ignore this read from memory.
1792  auto *Call = dyn_cast<CallInst>(&I);
1793  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1794  continue;
1795 
1796  // If the function has an explicit vectorized counterpart, we can safely
1797  // assume that it can be vectorized.
1798  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1799  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1800  continue;
1801 
1802  auto *Ld = dyn_cast<LoadInst>(&I);
1803  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1804  recordAnalysis("NonSimpleLoad", Ld)
1805  << "read with atomic ordering or volatile read";
1806  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1807  CanVecMem = false;
1808  return;
1809  }
1810  NumLoads++;
1811  Loads.push_back(Ld);
1812  DepChecker->addAccess(Ld);
1814  collectStridedAccess(Ld);
1815  continue;
1816  }
1817 
1818  // Save 'store' instructions. Abort if other instructions write to memory.
1819  if (I.mayWriteToMemory()) {
1820  auto *St = dyn_cast<StoreInst>(&I);
1821  if (!St) {
1822  recordAnalysis("CantVectorizeInstruction", St)
1823  << "instruction cannot be vectorized";
1824  CanVecMem = false;
1825  return;
1826  }
1827  if (!St->isSimple() && !IsAnnotatedParallel) {
1828  recordAnalysis("NonSimpleStore", St)
1829  << "write with atomic ordering or volatile write";
1830  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1831  CanVecMem = false;
1832  return;
1833  }
1834  NumStores++;
1835  Stores.push_back(St);
1836  DepChecker->addAccess(St);
1838  collectStridedAccess(St);
1839  }
1840  } // Next instr.
1841  } // Next block.
1842 
1843  // Now we have two lists that hold the loads and the stores.
1844  // Next, we find the pointers that they use.
1845 
1846  // Check if we see any stores. If there are no stores, then we don't
1847  // care if the pointers are *restrict*.
1848  if (!Stores.size()) {
1849  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1850  CanVecMem = true;
1851  return;
1852  }
1853 
1854  MemoryDepChecker::DepCandidates DependentAccesses;
1855  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1856  TheLoop, AA, LI, DependentAccesses, *PSE);
1857 
1858  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1859  // multiple times on the same object. If the ptr is accessed twice, once
1860  // for read and once for write, it will only appear once (on the write
1861  // list). This is okay, since we are going to check for conflicts between
1862  // writes and between reads and writes, but not between reads and reads.
1863  ValueSet Seen;
1864 
1865  // Record uniform store addresses to identify if we have multiple stores
1866  // to the same address.
1867  ValueSet UniformStores;
1868 
1869  for (StoreInst *ST : Stores) {
1870  Value *Ptr = ST->getPointerOperand();
1871 
1872  if (isUniform(Ptr)) {
1873  // Consider multiple stores to the same uniform address as a store of a
1874  // variant value.
1875  bool MultipleStoresToUniformPtr = !UniformStores.insert(Ptr).second;
1876  HasVariantStoreToLoopInvariantAddress |=
1877  (!isUniform(ST->getValueOperand()) || MultipleStoresToUniformPtr);
1878  }
1879 
1880  // If we did *not* see this pointer before, insert it to the read-write
1881  // list. At this phase it is only a 'write' list.
1882  if (Seen.insert(Ptr).second) {
1883  ++NumReadWrites;
1884 
1886  // The TBAA metadata could have a control dependency on the predication
1887  // condition, so we cannot rely on it when determining whether or not we
1888  // need runtime pointer checks.
1889  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1890  Loc.AATags.TBAA = nullptr;
1891 
1892  Accesses.addStore(Loc);
1893  }
1894  }
1895 
1896  if (IsAnnotatedParallel) {
1897  LLVM_DEBUG(
1898  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1899  << "checks.\n");
1900  CanVecMem = true;
1901  return;
1902  }
1903 
1904  for (LoadInst *LD : Loads) {
1905  Value *Ptr = LD->getPointerOperand();
1906  // If we did *not* see this pointer before, insert it to the
1907  // read list. If we *did* see it before, then it is already in
1908  // the read-write list. This allows us to vectorize expressions
1909  // such as A[i] += x; Because the address of A[i] is a read-write
1910  // pointer. This only works if the index of A[i] is consecutive.
1911  // If the address of i is unknown (for example A[B[i]]) then we may
1912  // read a few words, modify, and write a few words, and some of the
1913  // words may be written to the same address.
1914  bool IsReadOnlyPtr = false;
1915  if (Seen.insert(Ptr).second ||
1916  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1917  ++NumReads;
1918  IsReadOnlyPtr = true;
1919  }
1920 
1922  // The TBAA metadata could have a control dependency on the predication
1923  // condition, so we cannot rely on it when determining whether or not we
1924  // need runtime pointer checks.
1925  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1926  Loc.AATags.TBAA = nullptr;
1927 
1928  Accesses.addLoad(Loc, IsReadOnlyPtr);
1929  }
1930 
1931  // If we write (or read-write) to a single destination and there are no
1932  // other reads in this loop then is it safe to vectorize.
1933  if (NumReadWrites == 1 && NumReads == 0) {
1934  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1935  CanVecMem = true;
1936  return;
1937  }
1938 
1939  // Build dependence sets and check whether we need a runtime pointer bounds
1940  // check.
1941  Accesses.buildDependenceSets();
1942 
1943  // Find pointers with computable bounds. We are going to use this information
1944  // to place a runtime bound check.
1945  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1946  TheLoop, SymbolicStrides);
1947  if (!CanDoRTIfNeeded) {
1948  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1949  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1950  << "the array bounds.\n");
1951  CanVecMem = false;
1952  return;
1953  }
1954 
1955  LLVM_DEBUG(
1956  dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1957 
1958  CanVecMem = true;
1959  if (Accesses.isDependencyCheckNeeded()) {
1960  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1961  CanVecMem = DepChecker->areDepsSafe(
1962  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1963  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1964 
1965  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1966  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1967 
1968  // Clear the dependency checks. We assume they are not needed.
1969  Accesses.resetDepChecks(*DepChecker);
1970 
1971  PtrRtChecking->reset();
1972  PtrRtChecking->Need = true;
1973 
1974  auto *SE = PSE->getSE();
1975  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1976  SymbolicStrides, true);
1977 
1978  // Check that we found the bounds for the pointer.
1979  if (!CanDoRTIfNeeded) {
1980  recordAnalysis("CantCheckMemDepsAtRunTime")
1981  << "cannot check memory dependencies at runtime";
1982  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1983  CanVecMem = false;
1984  return;
1985  }
1986 
1987  CanVecMem = true;
1988  }
1989  }
1990 
1991  if (CanVecMem)
1992  LLVM_DEBUG(
1993  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1994  << (PtrRtChecking->Need ? "" : " don't")
1995  << " need runtime memory checks.\n");
1996  else {
1997  recordAnalysis("UnsafeMemDep")
1998  << "unsafe dependent memory operations in loop. Use "
1999  "#pragma loop distribute(enable) to allow loop distribution "
2000  "to attempt to isolate the offending operations into a separate "
2001  "loop";
2002  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2003  }
2004 }
2005 
2007  DominatorTree *DT) {
2008  assert(TheLoop->contains(BB) && "Unknown block used");
2009 
2010  // Blocks that do not dominate the latch need predication.
2011  BasicBlock* Latch = TheLoop->getLoopLatch();
2012  return !DT->dominates(BB, Latch);
2013 }
2014 
2015 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2016  Instruction *I) {
2017  assert(!Report && "Multiple reports generated");
2018 
2019  Value *CodeRegion = TheLoop->getHeader();
2020  DebugLoc DL = TheLoop->getStartLoc();
2021 
2022  if (I) {
2023  CodeRegion = I->getParent();
2024  // If there is no debug location attached to the instruction, revert back to
2025  // using the loop's.
2026  if (I->getDebugLoc())
2027  DL = I->getDebugLoc();
2028  }
2029 
2030  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2031  CodeRegion);
2032  return *Report;
2033 }
2034 
2036  auto *SE = PSE->getSE();
2037  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2038  // never considered uniform.
2039  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2040  // trivially loop-invariant FP values to be considered uniform.
2041  if (!SE->isSCEVable(V->getType()))
2042  return false;
2043  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2044 }
2045 
2046 // FIXME: this function is currently a duplicate of the one in
2047 // LoopVectorize.cpp.
2049  Instruction *Loc) {
2050  if (FirstInst)
2051  return FirstInst;
2052  if (Instruction *I = dyn_cast<Instruction>(V))
2053  return I->getParent() == Loc->getParent() ? I : nullptr;
2054  return nullptr;
2055 }
2056 
2057 namespace {
2058 
2059 /// IR Values for the lower and upper bounds of a pointer evolution. We
2060 /// need to use value-handles because SCEV expansion can invalidate previously
2061 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2062 /// a previous one.
2063 struct PointerBounds {
2064  TrackingVH<Value> Start;
2065  TrackingVH<Value> End;
2066 };
2067 
2068 } // end anonymous namespace
2069 
2070 /// Expand code for the lower and upper bound of the pointer group \p CG
2071 /// in \p TheLoop. \return the values for the bounds.
2072 static PointerBounds
2074  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2075  const RuntimePointerChecking &PtrRtChecking) {
2076  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2077  const SCEV *Sc = SE->getSCEV(Ptr);
2078 
2079  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2080  LLVMContext &Ctx = Loc->getContext();
2081 
2082  // Use this type for pointer arithmetic.
2083  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2084 
2085  if (SE->isLoopInvariant(Sc, TheLoop)) {
2086  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2087  << *Ptr << "\n");
2088  // Ptr could be in the loop body. If so, expand a new one at the correct
2089  // location.
2090  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2091  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2092  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2093  : Ptr;
2094  // We must return a half-open range, which means incrementing Sc.
2095  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2096  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2097  return {NewPtr, NewPtrPlusOne};
2098  } else {
2099  Value *Start = nullptr, *End = nullptr;
2100  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2101  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2102  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2103  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2104  << "\n");
2105  return {Start, End};
2106  }
2107 }
2108 
2109 /// Turns a collection of checks into a collection of expanded upper and
2110 /// lower bounds for both pointers in the check.
2113  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2114  const RuntimePointerChecking &PtrRtChecking) {
2116 
2117  // Here we're relying on the SCEV Expander's cache to only emit code for the
2118  // same bounds once.
2119  transform(
2120  PointerChecks, std::back_inserter(ChecksWithBounds),
2122  PointerBounds
2123  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2124  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2125  return std::make_pair(First, Second);
2126  });
2127 
2128  return ChecksWithBounds;
2129 }
2130 
2131 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2132  Instruction *Loc,
2134  const {
2135  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2136  auto *SE = PSE->getSE();
2137  SCEVExpander Exp(*SE, DL, "induction");
2138  auto ExpandedChecks =
2139  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2140 
2141  LLVMContext &Ctx = Loc->getContext();
2142  Instruction *FirstInst = nullptr;
2143  IRBuilder<> ChkBuilder(Loc);
2144  // Our instructions might fold to a constant.
2145  Value *MemoryRuntimeCheck = nullptr;
2146 
2147  for (const auto &Check : ExpandedChecks) {
2148  const PointerBounds &A = Check.first, &B = Check.second;
2149  // Check if two pointers (A and B) conflict where conflict is computed as:
2150  // start(A) <= end(B) && start(B) <= end(A)
2151  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2152  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2153 
2154  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2155  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2156  "Trying to bounds check pointers with different address spaces");
2157 
2158  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2159  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2160 
2161  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2162  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2163  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2164  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2165 
2166  // [A|B].Start points to the first accessed byte under base [A|B].
2167  // [A|B].End points to the last accessed byte, plus one.
2168  // There is no conflict when the intervals are disjoint:
2169  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2170  //
2171  // bound0 = (B.Start < A.End)
2172  // bound1 = (A.Start < B.End)
2173  // IsConflict = bound0 & bound1
2174  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2175  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2176  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2177  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2178  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2179  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2180  if (MemoryRuntimeCheck) {
2181  IsConflict =
2182  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2183  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2184  }
2185  MemoryRuntimeCheck = IsConflict;
2186  }
2187 
2188  if (!MemoryRuntimeCheck)
2189  return std::make_pair(nullptr, nullptr);
2190 
2191  // We have to do this trickery because the IRBuilder might fold the check to a
2192  // constant expression in which case there is no Instruction anchored in a
2193  // the block.
2194  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2195  ConstantInt::getTrue(Ctx));
2196  ChkBuilder.Insert(Check, "memcheck.conflict");
2197  FirstInst = getFirstInst(FirstInst, Check, Loc);
2198  return std::make_pair(FirstInst, Check);
2199 }
2200 
2201 std::pair<Instruction *, Instruction *>
2203  if (!PtrRtChecking->Need)
2204  return std::make_pair(nullptr, nullptr);
2205 
2206  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2207 }
2208 
2209 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2210  Value *Ptr = nullptr;
2211  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2212  Ptr = LI->getPointerOperand();
2213  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2214  Ptr = SI->getPointerOperand();
2215  else
2216  return;
2217 
2218  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2219  if (!Stride)
2220  return;
2221 
2222  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2223  "versioning:");
2224  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2225 
2226  // Avoid adding the "Stride == 1" predicate when we know that
2227  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2228  // or zero iteration loop, as Trip-Count <= Stride == 1.
2229  //
2230  // TODO: We are currently not making a very informed decision on when it is
2231  // beneficial to apply stride versioning. It might make more sense that the
2232  // users of this analysis (such as the vectorizer) will trigger it, based on
2233  // their specific cost considerations; For example, in cases where stride
2234  // versioning does not help resolving memory accesses/dependences, the
2235  // vectorizer should evaluate the cost of the runtime test, and the benefit
2236  // of various possible stride specializations, considering the alternatives
2237  // of using gather/scatters (if available).
2238 
2239  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2240  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2241 
2242  // Match the types so we can compare the stride and the BETakenCount.
2243  // The Stride can be positive/negative, so we sign extend Stride;
2244  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
2245  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2246  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2247  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2248  const SCEV *CastedStride = StrideExpr;
2249  const SCEV *CastedBECount = BETakenCount;
2250  ScalarEvolution *SE = PSE->getSE();
2251  if (BETypeSize >= StrideTypeSize)
2252  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2253  else
2254  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2255  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2256  // Since TripCount == BackEdgeTakenCount + 1, checking:
2257  // "Stride >= TripCount" is equivalent to checking:
2258  // Stride - BETakenCount > 0
2259  if (SE->isKnownPositive(StrideMinusBETaken)) {
2260  LLVM_DEBUG(
2261  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2262  "Stride==1 predicate will imply that the loop executes "
2263  "at most once.\n");
2264  return;
2265  }
2266  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2267 
2268  SymbolicStrides[Ptr] = Stride;
2269  StrideSet.insert(Stride);
2270 }
2271 
2273  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2274  DominatorTree *DT, LoopInfo *LI)
2275  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2276  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2277  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2278  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2279  HasVariantStoreToLoopInvariantAddress(false) {
2280  if (canAnalyzeLoop())
2281  analyzeLoop(AA, LI, TLI, DT);
2282 }
2283 
2284 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2285  if (CanVecMem) {
2286  OS.indent(Depth) << "Memory dependences are safe";
2287  if (MaxSafeDepDistBytes != -1ULL)
2288  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2289  << " bytes";
2290  if (PtrRtChecking->Need)
2291  OS << " with run-time checks";
2292  OS << "\n";
2293  }
2294 
2295  if (Report)
2296  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2297 
2298  if (auto *Dependences = DepChecker->getDependences()) {
2299  OS.indent(Depth) << "Dependences:\n";
2300  for (auto &Dep : *Dependences) {
2301  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2302  OS << "\n";
2303  }
2304  } else
2305  OS.indent(Depth) << "Too many dependences, not recorded\n";
2306 
2307  // List the pair of accesses need run-time checks to prove independence.
2308  PtrRtChecking->print(OS, Depth);
2309  OS << "\n";
2310 
2311  OS.indent(Depth) << "Variant Store to invariant address was "
2312  << (HasVariantStoreToLoopInvariantAddress ? "" : "not ")
2313  << "found in loop.\n";
2314 
2315  OS.indent(Depth) << "SCEV assumptions:\n";
2316  PSE->getUnionPredicate().print(OS, Depth);
2317 
2318  OS << "\n";
2319 
2320  OS.indent(Depth) << "Expressions re-written:\n";
2321  PSE->print(OS, Depth);
2322 }
2323 
2325  auto &LAI = LoopAccessInfoMap[L];
2326 
2327  if (!LAI)
2328  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2329 
2330  return *LAI.get();
2331 }
2332 
2334  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2335 
2336  for (Loop *TopLevelLoop : *LI)
2337  for (Loop *L : depth_first(TopLevelLoop)) {
2338  OS.indent(2) << L->getHeader()->getName() << ":\n";
2339  auto &LAI = LAA.getInfo(L);
2340  LAI.print(OS, 4);
2341  }
2342 }
2343 
2345  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2346  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2347  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2348  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2349  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2350  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2351 
2352  return false;
2353 }
2354 
2360 
2361  AU.setPreservesAll();
2362 }
2363 
2365 static const char laa_name[] = "Loop Access Analysis";
2366 #define LAA_NAME "loop-accesses"
2367 
2374 
2376 
2379  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2380 }
2381 
2382 namespace llvm {
2383 
2385  return new LoopAccessLegacyAnalysis();
2386  }
2387 
2388 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1794
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:218
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:658
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1764
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
static constexpr LocationSize unknown()
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:974
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1205
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:376
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:97
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1641
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1569
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:310
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1142
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:147
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:843
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:337
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
unsigned getAddressSpace() const
Definition: Globals.cpp:111
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:335
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:53
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:188
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:557
Class for arbitrary precision integers.
Definition: APInt.h:70
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
Dependece between memory access instructions.
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
iterator end()
Definition: DenseMap.h:80
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1124
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1124
bool empty() const
Definition: LoopInfo.h:146
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:794
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:294
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
A vector that has set insertion semantics.
Definition: SetVector.h:41
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1094
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:426
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
void resize(size_type N)
Definition: SmallVector.h:351