LLVM  7.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  if (auto *CI = dyn_cast<CastInst>(V))
140  if (CI->getOperand(0)->getType()->isIntegerTy())
141  return CI->getOperand(0);
142  return V;
143 }
144 
146  const ValueToValueMap &PtrToStride,
147  Value *Ptr, Value *OrigPtr) {
148  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149 
150  // If there is an entry in the map return the SCEV of the pointer with the
151  // symbolic stride replaced by one.
153  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154  if (SI != PtrToStride.end()) {
155  Value *StrideVal = SI->second;
156 
157  // Strip casts.
158  StrideVal = stripIntegerCast(StrideVal);
159 
160  ScalarEvolution *SE = PSE.getSE();
161  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162  const auto *CT =
163  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164 
165  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166  auto *Expr = PSE.getSCEV(Ptr);
167 
168  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
169  << " by: " << *Expr << "\n");
170  return Expr;
171  }
172 
173  // Otherwise, just return the SCEV of the original pointer.
174  return OrigSCEV;
175 }
176 
177 /// Calculate Start and End points of memory access.
178 /// Let's assume A is the first access and B is a memory access on N-th loop
179 /// iteration. Then B is calculated as:
180 /// B = A + Step*N .
181 /// Step value may be positive or negative.
182 /// N is a calculated back-edge taken count:
183 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184 /// Start and End points are calculated in the following way:
185 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186 /// where SizeOfElt is the size of single memory access in bytes.
187 ///
188 /// There is no conflict when the intervals are disjoint:
189 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191  unsigned DepSetId, unsigned ASId,
192  const ValueToValueMap &Strides,
194  // Get the stride replaced scev.
195  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196  ScalarEvolution *SE = PSE.getSE();
197 
198  const SCEV *ScStart;
199  const SCEV *ScEnd;
200 
201  if (SE->isLoopInvariant(Sc, Lp))
202  ScStart = ScEnd = Sc;
203  else {
204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205  assert(AR && "Invalid addrec expression");
206  const SCEV *Ex = PSE.getBackedgeTakenCount();
207 
208  ScStart = AR->getStart();
209  ScEnd = AR->evaluateAtIteration(Ex, *SE);
210  const SCEV *Step = AR->getStepRecurrence(*SE);
211 
212  // For expressions with negative step, the upper bound is ScStart and the
213  // lower bound is ScEnd.
214  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215  if (CStep->getValue()->isNegative())
216  std::swap(ScStart, ScEnd);
217  } else {
218  // Fallback case: the step is not constant, but we can still
219  // get the upper and lower bounds of the interval by using min/max
220  // expressions.
221  ScStart = SE->getUMinExpr(ScStart, ScEnd);
222  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223  }
224  // Add the size of the pointed element to ScEnd.
225  unsigned EltSize =
227  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229  }
230 
231  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232 }
233 
235 RuntimePointerChecking::generateChecks() const {
237 
238  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242 
243  if (needsChecking(CGI, CGJ))
244  Checks.push_back(std::make_pair(&CGI, &CGJ));
245  }
246  }
247  return Checks;
248 }
249 
250 void RuntimePointerChecking::generateChecks(
251  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252  assert(Checks.empty() && "Checks is not empty");
253  groupChecks(DepCands, UseDependencies);
254  Checks = generateChecks();
255 }
256 
258  const CheckingPtrGroup &N) const {
259  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261  if (needsChecking(M.Members[I], N.Members[J]))
262  return true;
263  return false;
264 }
265 
266 /// Compare \p I and \p J and return the minimum.
267 /// Return nullptr in case we couldn't find an answer.
268 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269  ScalarEvolution *SE) {
270  const SCEV *Diff = SE->getMinusSCEV(J, I);
271  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272 
273  if (!C)
274  return nullptr;
275  if (C->getValue()->isNegative())
276  return J;
277  return I;
278 }
279 
281  const SCEV *Start = RtCheck.Pointers[Index].Start;
282  const SCEV *End = RtCheck.Pointers[Index].End;
283 
284  // Compare the starts and ends with the known minimum and maximum
285  // of this set. We need to know how we compare against the min/max
286  // of the set in order to be able to emit memchecks.
287  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288  if (!Min0)
289  return false;
290 
291  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292  if (!Min1)
293  return false;
294 
295  // Update the low bound expression if we've found a new min value.
296  if (Min0 == Start)
297  Low = Start;
298 
299  // Update the high bound expression if we've found a new max value.
300  if (Min1 != End)
301  High = End;
302 
303  Members.push_back(Index);
304  return true;
305 }
306 
307 void RuntimePointerChecking::groupChecks(
308  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309  // We build the groups from dependency candidates equivalence classes
310  // because:
311  // - We know that pointers in the same equivalence class share
312  // the same underlying object and therefore there is a chance
313  // that we can compare pointers
314  // - We wouldn't be able to merge two pointers for which we need
315  // to emit a memcheck. The classes in DepCands are already
316  // conveniently built such that no two pointers in the same
317  // class need checking against each other.
318 
319  // We use the following (greedy) algorithm to construct the groups
320  // For every pointer in the equivalence class:
321  // For each existing group:
322  // - if the difference between this pointer and the min/max bounds
323  // of the group is a constant, then make the pointer part of the
324  // group and update the min/max bounds of that group as required.
325 
326  CheckingGroups.clear();
327 
328  // If we need to check two pointers to the same underlying object
329  // with a non-constant difference, we shouldn't perform any pointer
330  // grouping with those pointers. This is because we can easily get
331  // into cases where the resulting check would return false, even when
332  // the accesses are safe.
333  //
334  // The following example shows this:
335  // for (i = 0; i < 1000; ++i)
336  // a[5000 + i * m] = a[i] + a[i + 9000]
337  //
338  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339  // (0, 10000) which is always false. However, if m is 1, there is no
340  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341  // us to perform an accurate check in this case.
342  //
343  // The above case requires that we have an UnknownDependence between
344  // accesses to the same underlying object. This cannot happen unless
345  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
346  // is also false. In this case we will use the fallback path and create
347  // separate checking groups for all pointers.
348 
349  // If we don't have the dependency partitions, construct a new
350  // checking pointer group for each pointer. This is also required
351  // for correctness, because in this case we can have checking between
352  // pointers to the same underlying object.
353  if (!UseDependencies) {
354  for (unsigned I = 0; I < Pointers.size(); ++I)
355  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356  return;
357  }
358 
359  unsigned TotalComparisons = 0;
360 
361  DenseMap<Value *, unsigned> PositionMap;
362  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363  PositionMap[Pointers[Index].PointerValue] = Index;
364 
365  // We need to keep track of what pointers we've already seen so we
366  // don't process them twice.
368 
369  // Go through all equivalence classes, get the "pointer check groups"
370  // and add them to the overall solution. We use the order in which accesses
371  // appear in 'Pointers' to enforce determinism.
372  for (unsigned I = 0; I < Pointers.size(); ++I) {
373  // We've seen this pointer before, and therefore already processed
374  // its equivalence class.
375  if (Seen.count(I))
376  continue;
377 
378  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379  Pointers[I].IsWritePtr);
380 
382  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383 
384  // Because DepCands is constructed by visiting accesses in the order in
385  // which they appear in alias sets (which is deterministic) and the
386  // iteration order within an equivalence class member is only dependent on
387  // the order in which unions and insertions are performed on the
388  // equivalence class, the iteration order is deterministic.
389  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390  MI != ME; ++MI) {
391  unsigned Pointer = PositionMap[MI->getPointer()];
392  bool Merged = false;
393  // Mark this pointer as seen.
394  Seen.insert(Pointer);
395 
396  // Go through all the existing sets and see if we can find one
397  // which can include this pointer.
398  for (CheckingPtrGroup &Group : Groups) {
399  // Don't perform more than a certain amount of comparisons.
400  // This should limit the cost of grouping the pointers to something
401  // reasonable. If we do end up hitting this threshold, the algorithm
402  // will create separate groups for all remaining pointers.
403  if (TotalComparisons > MemoryCheckMergeThreshold)
404  break;
405 
406  TotalComparisons++;
407 
408  if (Group.addPointer(Pointer)) {
409  Merged = true;
410  break;
411  }
412  }
413 
414  if (!Merged)
415  // We couldn't add this pointer to any existing set or the threshold
416  // for the number of comparisons has been reached. Create a new group
417  // to hold the current pointer.
418  Groups.push_back(CheckingPtrGroup(Pointer, *this));
419  }
420 
421  // We've computed the grouped checks for this partition.
422  // Save the results and continue with the next one.
423  std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
424  }
425 }
426 
428  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429  unsigned PtrIdx2) {
430  return (PtrToPartition[PtrIdx1] != -1 &&
431  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432 }
433 
434 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435  const PointerInfo &PointerI = Pointers[I];
436  const PointerInfo &PointerJ = Pointers[J];
437 
438  // No need to check if two readonly pointers intersect.
439  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440  return false;
441 
442  // Only need to check pointers between two different dependency sets.
443  if (PointerI.DependencySetId == PointerJ.DependencySetId)
444  return false;
445 
446  // Only need to check pointers in the same alias set.
447  if (PointerI.AliasSetId != PointerJ.AliasSetId)
448  return false;
449 
450  return true;
451 }
452 
454  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455  unsigned Depth) const {
456  unsigned N = 0;
457  for (const auto &Check : Checks) {
458  const auto &First = Check.first->Members, &Second = Check.second->Members;
459 
460  OS.indent(Depth) << "Check " << N++ << ":\n";
461 
462  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463  for (unsigned K = 0; K < First.size(); ++K)
464  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465 
466  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467  for (unsigned K = 0; K < Second.size(); ++K)
468  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469  }
470 }
471 
473 
474  OS.indent(Depth) << "Run-time memory checks:\n";
475  printChecks(OS, Checks, Depth);
476 
477  OS.indent(Depth) << "Grouped accesses:\n";
478  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479  const auto &CG = CheckingGroups[I];
480 
481  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483  << ")\n";
484  for (unsigned J = 0; J < CG.Members.size(); ++J) {
485  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486  << "\n";
487  }
488  }
489 }
490 
491 namespace {
492 
493 /// Analyses memory accesses in a loop.
494 ///
495 /// Checks whether run time pointer checks are needed and builds sets for data
496 /// dependence checking.
497 class AccessAnalysis {
498 public:
499  /// Read or write access location.
500  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502 
503  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
506  : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
507  PSE(PSE) {}
508 
509  /// Register a load and whether it is only read from.
510  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511  Value *Ptr = const_cast<Value*>(Loc.Ptr);
512  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
513  Accesses.insert(MemAccessInfo(Ptr, false));
514  if (IsReadOnly)
515  ReadOnlyPtr.insert(Ptr);
516  }
517 
518  /// Register a store.
519  void addStore(MemoryLocation &Loc) {
520  Value *Ptr = const_cast<Value*>(Loc.Ptr);
521  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
522  Accesses.insert(MemAccessInfo(Ptr, true));
523  }
524 
525  /// Check if we can emit a run-time no-alias check for \p Access.
526  ///
527  /// Returns true if we can emit a run-time no alias check for \p Access.
528  /// If we can check this access, this also adds it to a dependence set and
529  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530  /// we will attempt to use additional run-time checks in order to get
531  /// the bounds of the pointer.
532  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533  MemAccessInfo Access,
534  const ValueToValueMap &Strides,
535  DenseMap<Value *, unsigned> &DepSetId,
536  Loop *TheLoop, unsigned &RunningDepId,
537  unsigned ASId, bool ShouldCheckStride,
538  bool Assume);
539 
540  /// Check whether we can check the pointers at runtime for
541  /// non-intersection.
542  ///
543  /// Returns true if we need no check or if we do and we can generate them
544  /// (i.e. the pointers have computable bounds).
545  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546  Loop *TheLoop, const ValueToValueMap &Strides,
547  bool ShouldCheckWrap = false);
548 
549  /// Goes over all memory accesses, checks whether a RT check is needed
550  /// and builds sets of dependent accesses.
551  void buildDependenceSets() {
552  processMemAccesses();
553  }
554 
555  /// Initial processing of memory accesses determined that we need to
556  /// perform dependency checking.
557  ///
558  /// Note that this can later be cleared if we retry memcheck analysis without
559  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
560  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562  /// We decided that no dependence analysis would be used. Reset the state.
563  void resetDepChecks(MemoryDepChecker &DepChecker) {
564  CheckDeps.clear();
565  DepChecker.clearDependences();
566  }
567 
568  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571  typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573  /// Go over all memory access and check whether runtime pointer checks
574  /// are needed and build sets of dependency check candidates.
575  void processMemAccesses();
576 
577  /// Set of all accesses.
578  PtrAccessSet Accesses;
579 
580  const DataLayout &DL;
581 
582  /// List of accesses that need a further dependence check.
583  MemAccessInfoList CheckDeps;
584 
585  /// Set of pointers that are read only.
586  SmallPtrSet<Value*, 16> ReadOnlyPtr;
587 
588  /// An alias set tracker to partition the access set by underlying object and
589  //intrinsic property (such as TBAA metadata).
590  AliasSetTracker AST;
591 
592  LoopInfo *LI;
593 
594  /// Sets of potentially dependent accesses - members of one set share an
595  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
596  /// dependence check.
598 
599  /// Initial processing of memory accesses determined that we may need
600  /// to add memchecks. Perform the analysis to determine the necessary checks.
601  ///
602  /// Note that, this is different from isDependencyCheckNeeded. When we retry
603  /// memcheck analysis without dependency checking
604  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
605  /// while this remains set if we have potentially dependent accesses.
606  bool IsRTCheckAnalysisNeeded;
607 
608  /// The SCEV predicate containing all the SCEV-related assumptions.
610 };
611 
612 } // end anonymous namespace
613 
614 /// Check whether a pointer can participate in a runtime bounds check.
615 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
616 /// by adding run-time checks (overflow checks) if necessary.
618  const ValueToValueMap &Strides, Value *Ptr,
619  Loop *L, bool Assume) {
620  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
621 
622  // The bounds for loop-invariant pointer is trivial.
623  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
624  return true;
625 
626  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
627 
628  if (!AR && Assume)
629  AR = PSE.getAsAddRec(Ptr);
630 
631  if (!AR)
632  return false;
633 
634  return AR->isAffine();
635 }
636 
637 /// Check whether a pointer address cannot wrap.
639  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
640  const SCEV *PtrScev = PSE.getSCEV(Ptr);
641  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
642  return true;
643 
644  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
645  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
646  return true;
647 
648  return false;
649 }
650 
651 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
652  MemAccessInfo Access,
653  const ValueToValueMap &StridesMap,
654  DenseMap<Value *, unsigned> &DepSetId,
655  Loop *TheLoop, unsigned &RunningDepId,
656  unsigned ASId, bool ShouldCheckWrap,
657  bool Assume) {
658  Value *Ptr = Access.getPointer();
659 
660  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
661  return false;
662 
663  // When we run after a failing dependency check we have to make sure
664  // we don't have wrapping pointers.
665  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
666  auto *Expr = PSE.getSCEV(Ptr);
667  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
668  return false;
669  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
670  }
671 
672  // The id of the dependence set.
673  unsigned DepId;
674 
675  if (isDependencyCheckNeeded()) {
676  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
677  unsigned &LeaderId = DepSetId[Leader];
678  if (!LeaderId)
679  LeaderId = RunningDepId++;
680  DepId = LeaderId;
681  } else
682  // Each access has its own dependence set.
683  DepId = RunningDepId++;
684 
685  bool IsWrite = Access.getInt();
686  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
687  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
688 
689  return true;
690  }
691 
692 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
693  ScalarEvolution *SE, Loop *TheLoop,
694  const ValueToValueMap &StridesMap,
695  bool ShouldCheckWrap) {
696  // Find pointers with computable bounds. We are going to use this information
697  // to place a runtime bound check.
698  bool CanDoRT = true;
699 
700  bool NeedRTCheck = false;
701  if (!IsRTCheckAnalysisNeeded) return true;
702 
703  bool IsDepCheckNeeded = isDependencyCheckNeeded();
704 
705  // We assign a consecutive id to access from different alias sets.
706  // Accesses between different groups doesn't need to be checked.
707  unsigned ASId = 1;
708  for (auto &AS : AST) {
709  int NumReadPtrChecks = 0;
710  int NumWritePtrChecks = 0;
711  bool CanDoAliasSetRT = true;
712 
713  // We assign consecutive id to access from different dependence sets.
714  // Accesses within the same set don't need a runtime check.
715  unsigned RunningDepId = 1;
717 
719 
720  for (auto A : AS) {
721  Value *Ptr = A.getValue();
722  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
723  MemAccessInfo Access(Ptr, IsWrite);
724 
725  if (IsWrite)
726  ++NumWritePtrChecks;
727  else
728  ++NumReadPtrChecks;
729 
730  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
731  RunningDepId, ASId, ShouldCheckWrap, false)) {
732  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
733  Retries.push_back(Access);
734  CanDoAliasSetRT = false;
735  }
736  }
737 
738  // If we have at least two writes or one write and a read then we need to
739  // check them. But there is no need to checks if there is only one
740  // dependence set for this alias set.
741  //
742  // Note that this function computes CanDoRT and NeedRTCheck independently.
743  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
744  // for which we couldn't find the bounds but we don't actually need to emit
745  // any checks so it does not matter.
746  bool NeedsAliasSetRTCheck = false;
747  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
748  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
749  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
750 
751  // We need to perform run-time alias checks, but some pointers had bounds
752  // that couldn't be checked.
753  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
754  // Reset the CanDoSetRt flag and retry all accesses that have failed.
755  // We know that we need these checks, so we can now be more aggressive
756  // and add further checks if required (overflow checks).
757  CanDoAliasSetRT = true;
758  for (auto Access : Retries)
759  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
760  TheLoop, RunningDepId, ASId,
761  ShouldCheckWrap, /*Assume=*/true)) {
762  CanDoAliasSetRT = false;
763  break;
764  }
765  }
766 
767  CanDoRT &= CanDoAliasSetRT;
768  NeedRTCheck |= NeedsAliasSetRTCheck;
769  ++ASId;
770  }
771 
772  // If the pointers that we would use for the bounds comparison have different
773  // address spaces, assume the values aren't directly comparable, so we can't
774  // use them for the runtime check. We also have to assume they could
775  // overlap. In the future there should be metadata for whether address spaces
776  // are disjoint.
777  unsigned NumPointers = RtCheck.Pointers.size();
778  for (unsigned i = 0; i < NumPointers; ++i) {
779  for (unsigned j = i + 1; j < NumPointers; ++j) {
780  // Only need to check pointers between two different dependency sets.
781  if (RtCheck.Pointers[i].DependencySetId ==
782  RtCheck.Pointers[j].DependencySetId)
783  continue;
784  // Only need to check pointers in the same alias set.
785  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
786  continue;
787 
788  Value *PtrI = RtCheck.Pointers[i].PointerValue;
789  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
790 
791  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
792  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
793  if (ASi != ASj) {
794  LLVM_DEBUG(
795  dbgs() << "LAA: Runtime check would require comparison between"
796  " different address spaces\n");
797  return false;
798  }
799  }
800  }
801 
802  if (NeedRTCheck && CanDoRT)
803  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
804 
805  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
806  << " pointer comparisons.\n");
807 
808  RtCheck.Need = NeedRTCheck;
809 
810  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
811  if (!CanDoRTIfNeeded)
812  RtCheck.reset();
813  return CanDoRTIfNeeded;
814 }
815 
816 void AccessAnalysis::processMemAccesses() {
817  // We process the set twice: first we process read-write pointers, last we
818  // process read-only pointers. This allows us to skip dependence tests for
819  // read-only pointers.
820 
821  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
822  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
823  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
824  LLVM_DEBUG({
825  for (auto A : Accesses)
826  dbgs() << "\t" << *A.getPointer() << " (" <<
827  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
828  "read-only" : "read")) << ")\n";
829  });
830 
831  // The AliasSetTracker has nicely partitioned our pointers by metadata
832  // compatibility and potential for underlying-object overlap. As a result, we
833  // only need to check for potential pointer dependencies within each alias
834  // set.
835  for (auto &AS : AST) {
836  // Note that both the alias-set tracker and the alias sets themselves used
837  // linked lists internally and so the iteration order here is deterministic
838  // (matching the original instruction order within each set).
839 
840  bool SetHasWrite = false;
841 
842  // Map of pointers to last access encountered.
843  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
844  UnderlyingObjToAccessMap ObjToLastAccess;
845 
846  // Set of access to check after all writes have been processed.
847  PtrAccessSet DeferredAccesses;
848 
849  // Iterate over each alias set twice, once to process read/write pointers,
850  // and then to process read-only pointers.
851  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
852  bool UseDeferred = SetIteration > 0;
853  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
854 
855  for (auto AV : AS) {
856  Value *Ptr = AV.getValue();
857 
858  // For a single memory access in AliasSetTracker, Accesses may contain
859  // both read and write, and they both need to be handled for CheckDeps.
860  for (auto AC : S) {
861  if (AC.getPointer() != Ptr)
862  continue;
863 
864  bool IsWrite = AC.getInt();
865 
866  // If we're using the deferred access set, then it contains only
867  // reads.
868  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
869  if (UseDeferred && !IsReadOnlyPtr)
870  continue;
871  // Otherwise, the pointer must be in the PtrAccessSet, either as a
872  // read or a write.
873  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
874  S.count(MemAccessInfo(Ptr, false))) &&
875  "Alias-set pointer not in the access set?");
876 
877  MemAccessInfo Access(Ptr, IsWrite);
878  DepCands.insert(Access);
879 
880  // Memorize read-only pointers for later processing and skip them in
881  // the first round (they need to be checked after we have seen all
882  // write pointers). Note: we also mark pointer that are not
883  // consecutive as "read-only" pointers (so that we check
884  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
885  if (!UseDeferred && IsReadOnlyPtr) {
886  DeferredAccesses.insert(Access);
887  continue;
888  }
889 
890  // If this is a write - check other reads and writes for conflicts. If
891  // this is a read only check other writes for conflicts (but only if
892  // there is no other write to the ptr - this is an optimization to
893  // catch "a[i] = a[i] + " without having to do a dependence check).
894  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
895  CheckDeps.push_back(Access);
896  IsRTCheckAnalysisNeeded = true;
897  }
898 
899  if (IsWrite)
900  SetHasWrite = true;
901 
902  // Create sets of pointers connected by a shared alias set and
903  // underlying object.
904  typedef SmallVector<Value *, 16> ValueVector;
905  ValueVector TempObjects;
906 
907  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
908  LLVM_DEBUG(dbgs()
909  << "Underlying objects for pointer " << *Ptr << "\n");
910  for (Value *UnderlyingObj : TempObjects) {
911  // nullptr never alias, don't join sets for pointer that have "null"
912  // in their UnderlyingObjects list.
913  if (isa<ConstantPointerNull>(UnderlyingObj))
914  continue;
915 
916  UnderlyingObjToAccessMap::iterator Prev =
917  ObjToLastAccess.find(UnderlyingObj);
918  if (Prev != ObjToLastAccess.end())
919  DepCands.unionSets(Access, Prev->second);
920 
921  ObjToLastAccess[UnderlyingObj] = Access;
922  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
923  }
924  }
925  }
926  }
927  }
928 }
929 
930 static bool isInBoundsGep(Value *Ptr) {
931  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
932  return GEP->isInBounds();
933  return false;
934 }
935 
936 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
937 /// i.e. monotonically increasing/decreasing.
938 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
939  PredicatedScalarEvolution &PSE, const Loop *L) {
940  // FIXME: This should probably only return true for NUW.
942  return true;
943 
944  // Scalar evolution does not propagate the non-wrapping flags to values that
945  // are derived from a non-wrapping induction variable because non-wrapping
946  // could be flow-sensitive.
947  //
948  // Look through the potentially overflowing instruction to try to prove
949  // non-wrapping for the *specific* value of Ptr.
950 
951  // The arithmetic implied by an inbounds GEP can't overflow.
952  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
953  if (!GEP || !GEP->isInBounds())
954  return false;
955 
956  // Make sure there is only one non-const index and analyze that.
957  Value *NonConstIndex = nullptr;
958  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
959  if (!isa<ConstantInt>(Index)) {
960  if (NonConstIndex)
961  return false;
962  NonConstIndex = Index;
963  }
964  if (!NonConstIndex)
965  // The recurrence is on the pointer, ignore for now.
966  return false;
967 
968  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
969  // AddRec using a NSW operation.
970  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
971  if (OBO->hasNoSignedWrap() &&
972  // Assume constant for other the operand so that the AddRec can be
973  // easily found.
974  isa<ConstantInt>(OBO->getOperand(1))) {
975  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
976 
977  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
978  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
979  }
980 
981  return false;
982 }
983 
984 /// Check whether the access through \p Ptr has a constant stride.
986  const Loop *Lp, const ValueToValueMap &StridesMap,
987  bool Assume, bool ShouldCheckWrap) {
988  Type *Ty = Ptr->getType();
989  assert(Ty->isPointerTy() && "Unexpected non-ptr");
990 
991  // Make sure that the pointer does not point to aggregate types.
992  auto *PtrTy = cast<PointerType>(Ty);
993  if (PtrTy->getElementType()->isAggregateType()) {
994  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
995  << *Ptr << "\n");
996  return 0;
997  }
998 
999  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1000 
1001  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1002  if (Assume && !AR)
1003  AR = PSE.getAsAddRec(Ptr);
1004 
1005  if (!AR) {
1006  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1007  << " SCEV: " << *PtrScev << "\n");
1008  return 0;
1009  }
1010 
1011  // The accesss function must stride over the innermost loop.
1012  if (Lp != AR->getLoop()) {
1013  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1014  << *Ptr << " SCEV: " << *AR << "\n");
1015  return 0;
1016  }
1017 
1018  // The address calculation must not wrap. Otherwise, a dependence could be
1019  // inverted.
1020  // An inbounds getelementptr that is a AddRec with a unit stride
1021  // cannot wrap per definition. The unit stride requirement is checked later.
1022  // An getelementptr without an inbounds attribute and unit stride would have
1023  // to access the pointer value "0" which is undefined behavior in address
1024  // space 0, therefore we can also vectorize this case.
1025  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1026  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1028  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1029  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
1030  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
1031  if (Assume) {
1033  IsNoWrapAddRec = true;
1034  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1035  << "LAA: Pointer: " << *Ptr << "\n"
1036  << "LAA: SCEV: " << *AR << "\n"
1037  << "LAA: Added an overflow assumption\n");
1038  } else {
1039  LLVM_DEBUG(
1040  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1041  << *Ptr << " SCEV: " << *AR << "\n");
1042  return 0;
1043  }
1044  }
1045 
1046  // Check the step is constant.
1047  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1048 
1049  // Calculate the pointer stride and check if it is constant.
1050  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1051  if (!C) {
1052  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1053  << " SCEV: " << *AR << "\n");
1054  return 0;
1055  }
1056 
1057  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1058  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1059  const APInt &APStepVal = C->getAPInt();
1060 
1061  // Huge step value - give up.
1062  if (APStepVal.getBitWidth() > 64)
1063  return 0;
1064 
1065  int64_t StepVal = APStepVal.getSExtValue();
1066 
1067  // Strided access.
1068  int64_t Stride = StepVal / Size;
1069  int64_t Rem = StepVal % Size;
1070  if (Rem)
1071  return 0;
1072 
1073  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1074  // know we can't "wrap around the address space". In case of address space
1075  // zero we know that this won't happen without triggering undefined behavior.
1076  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
1077  Stride != 1 && Stride != -1) {
1078  if (Assume) {
1079  // We can avoid this case by adding a run-time check.
1080  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1081  << "inbouds or in address space 0 may wrap:\n"
1082  << "LAA: Pointer: " << *Ptr << "\n"
1083  << "LAA: SCEV: " << *AR << "\n"
1084  << "LAA: Added an overflow assumption\n");
1086  } else
1087  return 0;
1088  }
1089 
1090  return Stride;
1091 }
1092 
1094  ScalarEvolution &SE,
1095  SmallVectorImpl<unsigned> &SortedIndices) {
1097  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1098  "Expected list of pointer operands.");
1100  OffValPairs.reserve(VL.size());
1101 
1102  // Walk over the pointers, and map each of them to an offset relative to
1103  // first pointer in the array.
1104  Value *Ptr0 = VL[0];
1105  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1106  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1107 
1109  for (auto *Ptr : VL) {
1110  // TODO: Outline this code as a special, more time consuming, version of
1111  // computeConstantDifference() function.
1112  if (Ptr->getType()->getPointerAddressSpace() !=
1113  Ptr0->getType()->getPointerAddressSpace())
1114  return false;
1115  // If a pointer refers to a different underlying object, bail - the
1116  // pointers are by definition incomparable.
1117  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1118  if (CurrObj != Obj0)
1119  return false;
1120 
1121  const SCEV *Scev = SE.getSCEV(Ptr);
1122  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1123  // The pointers may not have a constant offset from each other, or SCEV
1124  // may just not be smart enough to figure out they do. Regardless,
1125  // there's nothing we can do.
1126  if (!Diff)
1127  return false;
1128 
1129  // Check if the pointer with the same offset is found.
1130  int64_t Offset = Diff->getAPInt().getSExtValue();
1131  if (!Offsets.insert(Offset).second)
1132  return false;
1133  OffValPairs.emplace_back(Offset, Ptr);
1134  }
1135  SortedIndices.clear();
1136  SortedIndices.resize(VL.size());
1137  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1138 
1139  // Sort the memory accesses and keep the order of their uses in UseOrder.
1140  std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
1141  [&OffValPairs](unsigned Left, unsigned Right) {
1142  return OffValPairs[Left].first < OffValPairs[Right].first;
1143  });
1144 
1145  // Check if the order is consecutive already.
1146  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1147  return I == SortedIndices[I];
1148  }))
1149  SortedIndices.clear();
1150 
1151  return true;
1152 }
1153 
1154 /// Take the address space operand from the Load/Store instruction.
1155 /// Returns -1 if this is not a valid Load/Store instruction.
1156 static unsigned getAddressSpaceOperand(Value *I) {
1157  if (LoadInst *L = dyn_cast<LoadInst>(I))
1158  return L->getPointerAddressSpace();
1159  if (StoreInst *S = dyn_cast<StoreInst>(I))
1160  return S->getPointerAddressSpace();
1161  return -1;
1162 }
1163 
1164 /// Returns true if the memory operations \p A and \p B are consecutive.
1166  ScalarEvolution &SE, bool CheckType) {
1167  Value *PtrA = getLoadStorePointerOperand(A);
1168  Value *PtrB = getLoadStorePointerOperand(B);
1169  unsigned ASA = getAddressSpaceOperand(A);
1170  unsigned ASB = getAddressSpaceOperand(B);
1171 
1172  // Check that the address spaces match and that the pointers are valid.
1173  if (!PtrA || !PtrB || (ASA != ASB))
1174  return false;
1175 
1176  // Make sure that A and B are different pointers.
1177  if (PtrA == PtrB)
1178  return false;
1179 
1180  // Make sure that A and B have the same type if required.
1181  if (CheckType && PtrA->getType() != PtrB->getType())
1182  return false;
1183 
1184  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1185  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1186  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1187 
1188  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1189  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1190  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1191 
1192  // OffsetDelta = OffsetB - OffsetA;
1193  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1194  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1195  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1196  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1197  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1198  // Check if they are based on the same pointer. That makes the offsets
1199  // sufficient.
1200  if (PtrA == PtrB)
1201  return OffsetDelta == Size;
1202 
1203  // Compute the necessary base pointer delta to have the necessary final delta
1204  // equal to the size.
1205  // BaseDelta = Size - OffsetDelta;
1206  const SCEV *SizeSCEV = SE.getConstant(Size);
1207  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1208 
1209  // Otherwise compute the distance with SCEV between the base pointers.
1210  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1211  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1212  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1213  return X == PtrSCEVB;
1214 }
1215 
1217  switch (Type) {
1218  case NoDep:
1219  case Forward:
1220  case BackwardVectorizable:
1221  return true;
1222 
1223  case Unknown:
1224  case ForwardButPreventsForwarding:
1225  case Backward:
1226  case BackwardVectorizableButPreventsForwarding:
1227  return false;
1228  }
1229  llvm_unreachable("unexpected DepType!");
1230 }
1231 
1233  switch (Type) {
1234  case NoDep:
1235  case Forward:
1236  case ForwardButPreventsForwarding:
1237  case Unknown:
1238  return false;
1239 
1240  case BackwardVectorizable:
1241  case Backward:
1242  case BackwardVectorizableButPreventsForwarding:
1243  return true;
1244  }
1245  llvm_unreachable("unexpected DepType!");
1246 }
1247 
1249  return isBackward() || Type == Unknown;
1250 }
1251 
1253  switch (Type) {
1254  case Forward:
1255  case ForwardButPreventsForwarding:
1256  return true;
1257 
1258  case NoDep:
1259  case Unknown:
1260  case BackwardVectorizable:
1261  case Backward:
1262  case BackwardVectorizableButPreventsForwarding:
1263  return false;
1264  }
1265  llvm_unreachable("unexpected DepType!");
1266 }
1267 
1268 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1269  uint64_t TypeByteSize) {
1270  // If loads occur at a distance that is not a multiple of a feasible vector
1271  // factor store-load forwarding does not take place.
1272  // Positive dependences might cause troubles because vectorizing them might
1273  // prevent store-load forwarding making vectorized code run a lot slower.
1274  // a[i] = a[i-3] ^ a[i-8];
1275  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1276  // hence on your typical architecture store-load forwarding does not take
1277  // place. Vectorizing in such cases does not make sense.
1278  // Store-load forwarding distance.
1279 
1280  // After this many iterations store-to-load forwarding conflicts should not
1281  // cause any slowdowns.
1282  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1283  // Maximum vector factor.
1284  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1285  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1286 
1287  // Compute the smallest VF at which the store and load would be misaligned.
1288  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1289  VF *= 2) {
1290  // If the number of vector iteration between the store and the load are
1291  // small we could incur conflicts.
1292  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1293  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1294  break;
1295  }
1296  }
1297 
1298  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1299  LLVM_DEBUG(
1300  dbgs() << "LAA: Distance " << Distance
1301  << " that could cause a store-load forwarding conflict\n");
1302  return true;
1303  }
1304 
1305  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1306  MaxVFWithoutSLForwardIssues !=
1307  VectorizerParams::MaxVectorWidth * TypeByteSize)
1308  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1309  return false;
1310 }
1311 
1312 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1313 /// memory accesses, that have the same stride whose absolute value is given
1314 /// in \p Stride, and that have the same type size \p TypeByteSize,
1315 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1316 /// possible to prove statically that the dependence distance is larger
1317 /// than the range that the accesses will travel through the execution of
1318 /// the loop. If so, return true; false otherwise. This is useful for
1319 /// example in loops such as the following (PR31098):
1320 /// for (i = 0; i < D; ++i) {
1321 /// = out[i];
1322 /// out[i+D] =
1323 /// }
1325  const SCEV &BackedgeTakenCount,
1326  const SCEV &Dist, uint64_t Stride,
1327  uint64_t TypeByteSize) {
1328 
1329  // If we can prove that
1330  // (**) |Dist| > BackedgeTakenCount * Step
1331  // where Step is the absolute stride of the memory accesses in bytes,
1332  // then there is no dependence.
1333  //
1334  // Ratioanle:
1335  // We basically want to check if the absolute distance (|Dist/Step|)
1336  // is >= the loop iteration count (or > BackedgeTakenCount).
1337  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1338  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1339  // that the dependence distance is >= VF; This is checked elsewhere.
1340  // But in some cases we can prune unknown dependence distances early, and
1341  // even before selecting the VF, and without a runtime test, by comparing
1342  // the distance against the loop iteration count. Since the vectorized code
1343  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1344  // also guarantees that distance >= VF.
1345  //
1346  const uint64_t ByteStride = Stride * TypeByteSize;
1347  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1348  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1349 
1350  const SCEV *CastedDist = &Dist;
1351  const SCEV *CastedProduct = Product;
1352  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1353  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1354 
1355  // The dependence distance can be positive/negative, so we sign extend Dist;
1356  // The multiplication of the absolute stride in bytes and the
1357  // backdgeTakenCount is non-negative, so we zero extend Product.
1358  if (DistTypeSize > ProductTypeSize)
1359  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1360  else
1361  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1362 
1363  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1364  // (If so, then we have proven (**) because |Dist| >= Dist)
1365  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1366  if (SE.isKnownPositive(Minus))
1367  return true;
1368 
1369  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1370  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1371  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1372  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1373  if (SE.isKnownPositive(Minus))
1374  return true;
1375 
1376  return false;
1377 }
1378 
1379 /// Check the dependence for two accesses with the same stride \p Stride.
1380 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1381 /// bytes.
1382 ///
1383 /// \returns true if they are independent.
1384 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1385  uint64_t TypeByteSize) {
1386  assert(Stride > 1 && "The stride must be greater than 1");
1387  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1388  assert(Distance > 0 && "The distance must be non-zero");
1389 
1390  // Skip if the distance is not multiple of type byte size.
1391  if (Distance % TypeByteSize)
1392  return false;
1393 
1394  uint64_t ScaledDist = Distance / TypeByteSize;
1395 
1396  // No dependence if the scaled distance is not multiple of the stride.
1397  // E.g.
1398  // for (i = 0; i < 1024 ; i += 4)
1399  // A[i+2] = A[i] + 1;
1400  //
1401  // Two accesses in memory (scaled distance is 2, stride is 4):
1402  // | A[0] | | | | A[4] | | | |
1403  // | | | A[2] | | | | A[6] | |
1404  //
1405  // E.g.
1406  // for (i = 0; i < 1024 ; i += 3)
1407  // A[i+4] = A[i] + 1;
1408  //
1409  // Two accesses in memory (scaled distance is 4, stride is 3):
1410  // | A[0] | | | A[3] | | | A[6] | | |
1411  // | | | | | A[4] | | | A[7] | |
1412  return ScaledDist % Stride;
1413 }
1414 
1416 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1417  const MemAccessInfo &B, unsigned BIdx,
1418  const ValueToValueMap &Strides) {
1419  assert (AIdx < BIdx && "Must pass arguments in program order");
1420 
1421  Value *APtr = A.getPointer();
1422  Value *BPtr = B.getPointer();
1423  bool AIsWrite = A.getInt();
1424  bool BIsWrite = B.getInt();
1425 
1426  // Two reads are independent.
1427  if (!AIsWrite && !BIsWrite)
1428  return Dependence::NoDep;
1429 
1430  // We cannot check pointers in different address spaces.
1431  if (APtr->getType()->getPointerAddressSpace() !=
1432  BPtr->getType()->getPointerAddressSpace())
1433  return Dependence::Unknown;
1434 
1435  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1436  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1437 
1438  const SCEV *Src = PSE.getSCEV(APtr);
1439  const SCEV *Sink = PSE.getSCEV(BPtr);
1440 
1441  // If the induction step is negative we have to invert source and sink of the
1442  // dependence.
1443  if (StrideAPtr < 0) {
1444  std::swap(APtr, BPtr);
1445  std::swap(Src, Sink);
1446  std::swap(AIsWrite, BIsWrite);
1447  std::swap(AIdx, BIdx);
1448  std::swap(StrideAPtr, StrideBPtr);
1449  }
1450 
1451  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1452 
1453  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1454  << "(Induction step: " << StrideAPtr << ")\n");
1455  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1456  << *InstMap[BIdx] << ": " << *Dist << "\n");
1457 
1458  // Need accesses with constant stride. We don't want to vectorize
1459  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1460  // the address space.
1461  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1462  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1463  return Dependence::Unknown;
1464  }
1465 
1466  Type *ATy = APtr->getType()->getPointerElementType();
1467  Type *BTy = BPtr->getType()->getPointerElementType();
1468  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1469  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1470  uint64_t Stride = std::abs(StrideAPtr);
1471  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1472  if (!C) {
1473  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1474  isSafeDependenceDistance(DL, *(PSE.getSE()),
1475  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1476  TypeByteSize))
1477  return Dependence::NoDep;
1478 
1479  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1480  ShouldRetryWithRuntimeCheck = true;
1481  return Dependence::Unknown;
1482  }
1483 
1484  const APInt &Val = C->getAPInt();
1485  int64_t Distance = Val.getSExtValue();
1486 
1487  // Attempt to prove strided accesses independent.
1488  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1489  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1490  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1491  return Dependence::NoDep;
1492  }
1493 
1494  // Negative distances are not plausible dependencies.
1495  if (Val.isNegative()) {
1496  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1497  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1498  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1499  ATy != BTy)) {
1500  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1501  return Dependence::ForwardButPreventsForwarding;
1502  }
1503 
1504  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1505  return Dependence::Forward;
1506  }
1507 
1508  // Write to the same location with the same size.
1509  // Could be improved to assert type sizes are the same (i32 == float, etc).
1510  if (Val == 0) {
1511  if (ATy == BTy)
1512  return Dependence::Forward;
1513  LLVM_DEBUG(
1514  dbgs() << "LAA: Zero dependence difference but different types\n");
1515  return Dependence::Unknown;
1516  }
1517 
1518  assert(Val.isStrictlyPositive() && "Expect a positive value");
1519 
1520  if (ATy != BTy) {
1521  LLVM_DEBUG(
1522  dbgs()
1523  << "LAA: ReadWrite-Write positive dependency with different types\n");
1524  return Dependence::Unknown;
1525  }
1526 
1527  // Bail out early if passed-in parameters make vectorization not feasible.
1528  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1530  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1532  // The minimum number of iterations for a vectorized/unrolled version.
1533  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1534 
1535  // It's not vectorizable if the distance is smaller than the minimum distance
1536  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1537  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1538  // TypeByteSize (No need to plus the last gap distance).
1539  //
1540  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1541  // foo(int *A) {
1542  // int *B = (int *)((char *)A + 14);
1543  // for (i = 0 ; i < 1024 ; i += 2)
1544  // B[i] = A[i] + 1;
1545  // }
1546  //
1547  // Two accesses in memory (stride is 2):
1548  // | A[0] | | A[2] | | A[4] | | A[6] | |
1549  // | B[0] | | B[2] | | B[4] |
1550  //
1551  // Distance needs for vectorizing iterations except the last iteration:
1552  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1553  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1554  //
1555  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1556  // 12, which is less than distance.
1557  //
1558  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1559  // the minimum distance needed is 28, which is greater than distance. It is
1560  // not safe to do vectorization.
1561  uint64_t MinDistanceNeeded =
1562  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1563  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1564  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1565  << Distance << '\n');
1566  return Dependence::Backward;
1567  }
1568 
1569  // Unsafe if the minimum distance needed is greater than max safe distance.
1570  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1571  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1572  << MinDistanceNeeded << " size in bytes");
1573  return Dependence::Backward;
1574  }
1575 
1576  // Positive distance bigger than max vectorization factor.
1577  // FIXME: Should use max factor instead of max distance in bytes, which could
1578  // not handle different types.
1579  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1580  // void foo (int *A, char *B) {
1581  // for (unsigned i = 0; i < 1024; i++) {
1582  // A[i+2] = A[i] + 1;
1583  // B[i+2] = B[i] + 1;
1584  // }
1585  // }
1586  //
1587  // This case is currently unsafe according to the max safe distance. If we
1588  // analyze the two accesses on array B, the max safe dependence distance
1589  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1590  // is 8, which is less than 2 and forbidden vectorization, But actually
1591  // both A and B could be vectorized by 2 iterations.
1592  MaxSafeDepDistBytes =
1593  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1594 
1595  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1596  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1597  couldPreventStoreLoadForward(Distance, TypeByteSize))
1598  return Dependence::BackwardVectorizableButPreventsForwarding;
1599 
1600  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1601  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1602  << " with max VF = " << MaxVF << '\n');
1603  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1604  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1605  return Dependence::BackwardVectorizable;
1606 }
1607 
1609  MemAccessInfoList &CheckDeps,
1610  const ValueToValueMap &Strides) {
1611 
1612  MaxSafeDepDistBytes = -1;
1614  for (MemAccessInfo CurAccess : CheckDeps) {
1615  if (Visited.count(CurAccess))
1616  continue;
1617 
1618  // Get the relevant memory access set.
1620  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1621 
1622  // Check accesses within this set.
1624  AccessSets.member_begin(I);
1626  AccessSets.member_end();
1627 
1628  // Check every access pair.
1629  while (AI != AE) {
1630  Visited.insert(*AI);
1632  while (OI != AE) {
1633  // Check every accessing instruction pair in program order.
1634  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1635  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1636  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1637  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1638  auto A = std::make_pair(&*AI, *I1);
1639  auto B = std::make_pair(&*OI, *I2);
1640 
1641  assert(*I1 != *I2);
1642  if (*I1 > *I2)
1643  std::swap(A, B);
1644 
1646  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1647  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1648 
1649  // Gather dependences unless we accumulated MaxDependences
1650  // dependences. In that case return as soon as we find the first
1651  // unsafe dependence. This puts a limit on this quadratic
1652  // algorithm.
1653  if (RecordDependences) {
1654  if (Type != Dependence::NoDep)
1655  Dependences.push_back(Dependence(A.second, B.second, Type));
1656 
1657  if (Dependences.size() >= MaxDependences) {
1658  RecordDependences = false;
1659  Dependences.clear();
1660  LLVM_DEBUG(dbgs()
1661  << "Too many dependences, stopped recording\n");
1662  }
1663  }
1664  if (!RecordDependences && !SafeForVectorization)
1665  return false;
1666  }
1667  ++OI;
1668  }
1669  AI++;
1670  }
1671  }
1672 
1673  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1674  return SafeForVectorization;
1675 }
1676 
1679  MemAccessInfo Access(Ptr, isWrite);
1680  auto &IndexVector = Accesses.find(Access)->second;
1681 
1683  transform(IndexVector,
1684  std::back_inserter(Insts),
1685  [&](unsigned Idx) { return this->InstMap[Idx]; });
1686  return Insts;
1687 }
1688 
1689 const char *MemoryDepChecker::Dependence::DepName[] = {
1690  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1691  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1692 
1694  raw_ostream &OS, unsigned Depth,
1695  const SmallVectorImpl<Instruction *> &Instrs) const {
1696  OS.indent(Depth) << DepName[Type] << ":\n";
1697  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1698  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1699 }
1700 
1701 bool LoopAccessInfo::canAnalyzeLoop() {
1702  // We need to have a loop header.
1703  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1704  << TheLoop->getHeader()->getParent()->getName() << ": "
1705  << TheLoop->getHeader()->getName() << '\n');
1706 
1707  // We can only analyze innermost loops.
1708  if (!TheLoop->empty()) {
1709  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1710  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1711  return false;
1712  }
1713 
1714  // We must have a single backedge.
1715  if (TheLoop->getNumBackEdges() != 1) {
1716  LLVM_DEBUG(
1717  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1718  recordAnalysis("CFGNotUnderstood")
1719  << "loop control flow is not understood by analyzer";
1720  return false;
1721  }
1722 
1723  // We must have a single exiting block.
1724  if (!TheLoop->getExitingBlock()) {
1725  LLVM_DEBUG(
1726  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1727  recordAnalysis("CFGNotUnderstood")
1728  << "loop control flow is not understood by analyzer";
1729  return false;
1730  }
1731 
1732  // We only handle bottom-tested loops, i.e. loop in which the condition is
1733  // checked at the end of each iteration. With that we can assume that all
1734  // instructions in the loop are executed the same number of times.
1735  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1736  LLVM_DEBUG(
1737  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1738  recordAnalysis("CFGNotUnderstood")
1739  << "loop control flow is not understood by analyzer";
1740  return false;
1741  }
1742 
1743  // ScalarEvolution needs to be able to find the exit count.
1744  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1745  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1746  recordAnalysis("CantComputeNumberOfIterations")
1747  << "could not determine number of loop iterations";
1748  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1749  return false;
1750  }
1751 
1752  return true;
1753 }
1754 
1755 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1756  const TargetLibraryInfo *TLI,
1757  DominatorTree *DT) {
1758  typedef SmallPtrSet<Value*, 16> ValueSet;
1759 
1760  // Holds the Load and Store instructions.
1763 
1764  // Holds all the different accesses in the loop.
1765  unsigned NumReads = 0;
1766  unsigned NumReadWrites = 0;
1767 
1768  PtrRtChecking->Pointers.clear();
1769  PtrRtChecking->Need = false;
1770 
1771  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1772 
1773  // For each block.
1774  for (BasicBlock *BB : TheLoop->blocks()) {
1775  // Scan the BB and collect legal loads and stores.
1776  for (Instruction &I : *BB) {
1777  // If this is a load, save it. If this instruction can read from memory
1778  // but is not a load, then we quit. Notice that we don't handle function
1779  // calls that read or write.
1780  if (I.mayReadFromMemory()) {
1781  // Many math library functions read the rounding mode. We will only
1782  // vectorize a loop if it contains known function calls that don't set
1783  // the flag. Therefore, it is safe to ignore this read from memory.
1784  auto *Call = dyn_cast<CallInst>(&I);
1785  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1786  continue;
1787 
1788  // If the function has an explicit vectorized counterpart, we can safely
1789  // assume that it can be vectorized.
1790  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1791  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1792  continue;
1793 
1794  auto *Ld = dyn_cast<LoadInst>(&I);
1795  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1796  recordAnalysis("NonSimpleLoad", Ld)
1797  << "read with atomic ordering or volatile read";
1798  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1799  CanVecMem = false;
1800  return;
1801  }
1802  NumLoads++;
1803  Loads.push_back(Ld);
1804  DepChecker->addAccess(Ld);
1806  collectStridedAccess(Ld);
1807  continue;
1808  }
1809 
1810  // Save 'store' instructions. Abort if other instructions write to memory.
1811  if (I.mayWriteToMemory()) {
1812  auto *St = dyn_cast<StoreInst>(&I);
1813  if (!St) {
1814  recordAnalysis("CantVectorizeInstruction", St)
1815  << "instruction cannot be vectorized";
1816  CanVecMem = false;
1817  return;
1818  }
1819  if (!St->isSimple() && !IsAnnotatedParallel) {
1820  recordAnalysis("NonSimpleStore", St)
1821  << "write with atomic ordering or volatile write";
1822  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1823  CanVecMem = false;
1824  return;
1825  }
1826  NumStores++;
1827  Stores.push_back(St);
1828  DepChecker->addAccess(St);
1830  collectStridedAccess(St);
1831  }
1832  } // Next instr.
1833  } // Next block.
1834 
1835  // Now we have two lists that hold the loads and the stores.
1836  // Next, we find the pointers that they use.
1837 
1838  // Check if we see any stores. If there are no stores, then we don't
1839  // care if the pointers are *restrict*.
1840  if (!Stores.size()) {
1841  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1842  CanVecMem = true;
1843  return;
1844  }
1845 
1846  MemoryDepChecker::DepCandidates DependentAccesses;
1847  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1848  AA, LI, DependentAccesses, *PSE);
1849 
1850  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1851  // multiple times on the same object. If the ptr is accessed twice, once
1852  // for read and once for write, it will only appear once (on the write
1853  // list). This is okay, since we are going to check for conflicts between
1854  // writes and between reads and writes, but not between reads and reads.
1855  ValueSet Seen;
1856 
1857  for (StoreInst *ST : Stores) {
1858  Value *Ptr = ST->getPointerOperand();
1859  // Check for store to loop invariant address.
1860  StoreToLoopInvariantAddress |= isUniform(Ptr);
1861  // If we did *not* see this pointer before, insert it to the read-write
1862  // list. At this phase it is only a 'write' list.
1863  if (Seen.insert(Ptr).second) {
1864  ++NumReadWrites;
1865 
1867  // The TBAA metadata could have a control dependency on the predication
1868  // condition, so we cannot rely on it when determining whether or not we
1869  // need runtime pointer checks.
1870  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1871  Loc.AATags.TBAA = nullptr;
1872 
1873  Accesses.addStore(Loc);
1874  }
1875  }
1876 
1877  if (IsAnnotatedParallel) {
1878  LLVM_DEBUG(
1879  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1880  << "checks.\n");
1881  CanVecMem = true;
1882  return;
1883  }
1884 
1885  for (LoadInst *LD : Loads) {
1886  Value *Ptr = LD->getPointerOperand();
1887  // If we did *not* see this pointer before, insert it to the
1888  // read list. If we *did* see it before, then it is already in
1889  // the read-write list. This allows us to vectorize expressions
1890  // such as A[i] += x; Because the address of A[i] is a read-write
1891  // pointer. This only works if the index of A[i] is consecutive.
1892  // If the address of i is unknown (for example A[B[i]]) then we may
1893  // read a few words, modify, and write a few words, and some of the
1894  // words may be written to the same address.
1895  bool IsReadOnlyPtr = false;
1896  if (Seen.insert(Ptr).second ||
1897  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1898  ++NumReads;
1899  IsReadOnlyPtr = true;
1900  }
1901 
1903  // The TBAA metadata could have a control dependency on the predication
1904  // condition, so we cannot rely on it when determining whether or not we
1905  // need runtime pointer checks.
1906  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1907  Loc.AATags.TBAA = nullptr;
1908 
1909  Accesses.addLoad(Loc, IsReadOnlyPtr);
1910  }
1911 
1912  // If we write (or read-write) to a single destination and there are no
1913  // other reads in this loop then is it safe to vectorize.
1914  if (NumReadWrites == 1 && NumReads == 0) {
1915  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1916  CanVecMem = true;
1917  return;
1918  }
1919 
1920  // Build dependence sets and check whether we need a runtime pointer bounds
1921  // check.
1922  Accesses.buildDependenceSets();
1923 
1924  // Find pointers with computable bounds. We are going to use this information
1925  // to place a runtime bound check.
1926  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1927  TheLoop, SymbolicStrides);
1928  if (!CanDoRTIfNeeded) {
1929  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1930  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1931  << "the array bounds.\n");
1932  CanVecMem = false;
1933  return;
1934  }
1935 
1936  LLVM_DEBUG(
1937  dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1938 
1939  CanVecMem = true;
1940  if (Accesses.isDependencyCheckNeeded()) {
1941  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1942  CanVecMem = DepChecker->areDepsSafe(
1943  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1944  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1945 
1946  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1947  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1948 
1949  // Clear the dependency checks. We assume they are not needed.
1950  Accesses.resetDepChecks(*DepChecker);
1951 
1952  PtrRtChecking->reset();
1953  PtrRtChecking->Need = true;
1954 
1955  auto *SE = PSE->getSE();
1956  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1957  SymbolicStrides, true);
1958 
1959  // Check that we found the bounds for the pointer.
1960  if (!CanDoRTIfNeeded) {
1961  recordAnalysis("CantCheckMemDepsAtRunTime")
1962  << "cannot check memory dependencies at runtime";
1963  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1964  CanVecMem = false;
1965  return;
1966  }
1967 
1968  CanVecMem = true;
1969  }
1970  }
1971 
1972  if (CanVecMem)
1973  LLVM_DEBUG(
1974  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1975  << (PtrRtChecking->Need ? "" : " don't")
1976  << " need runtime memory checks.\n");
1977  else {
1978  recordAnalysis("UnsafeMemDep")
1979  << "unsafe dependent memory operations in loop. Use "
1980  "#pragma loop distribute(enable) to allow loop distribution "
1981  "to attempt to isolate the offending operations into a separate "
1982  "loop";
1983  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1984  }
1985 }
1986 
1988  DominatorTree *DT) {
1989  assert(TheLoop->contains(BB) && "Unknown block used");
1990 
1991  // Blocks that do not dominate the latch need predication.
1992  BasicBlock* Latch = TheLoop->getLoopLatch();
1993  return !DT->dominates(BB, Latch);
1994 }
1995 
1996 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
1997  Instruction *I) {
1998  assert(!Report && "Multiple reports generated");
1999 
2000  Value *CodeRegion = TheLoop->getHeader();
2001  DebugLoc DL = TheLoop->getStartLoc();
2002 
2003  if (I) {
2004  CodeRegion = I->getParent();
2005  // If there is no debug location attached to the instruction, revert back to
2006  // using the loop's.
2007  if (I->getDebugLoc())
2008  DL = I->getDebugLoc();
2009  }
2010 
2011  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2012  CodeRegion);
2013  return *Report;
2014 }
2015 
2017  auto *SE = PSE->getSE();
2018  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2019  // never considered uniform.
2020  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2021  // trivially loop-invariant FP values to be considered uniform.
2022  if (!SE->isSCEVable(V->getType()))
2023  return false;
2024  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2025 }
2026 
2027 // FIXME: this function is currently a duplicate of the one in
2028 // LoopVectorize.cpp.
2030  Instruction *Loc) {
2031  if (FirstInst)
2032  return FirstInst;
2033  if (Instruction *I = dyn_cast<Instruction>(V))
2034  return I->getParent() == Loc->getParent() ? I : nullptr;
2035  return nullptr;
2036 }
2037 
2038 namespace {
2039 
2040 /// IR Values for the lower and upper bounds of a pointer evolution. We
2041 /// need to use value-handles because SCEV expansion can invalidate previously
2042 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2043 /// a previous one.
2044 struct PointerBounds {
2045  TrackingVH<Value> Start;
2047 };
2048 
2049 } // end anonymous namespace
2050 
2051 /// Expand code for the lower and upper bound of the pointer group \p CG
2052 /// in \p TheLoop. \return the values for the bounds.
2053 static PointerBounds
2055  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2056  const RuntimePointerChecking &PtrRtChecking) {
2057  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2058  const SCEV *Sc = SE->getSCEV(Ptr);
2059 
2060  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2061  LLVMContext &Ctx = Loc->getContext();
2062 
2063  // Use this type for pointer arithmetic.
2064  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2065 
2066  if (SE->isLoopInvariant(Sc, TheLoop)) {
2067  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2068  << *Ptr << "\n");
2069  // Ptr could be in the loop body. If so, expand a new one at the correct
2070  // location.
2071  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2072  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2073  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2074  : Ptr;
2075  // We must return a half-open range, which means incrementing Sc.
2076  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2077  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2078  return {NewPtr, NewPtrPlusOne};
2079  } else {
2080  Value *Start = nullptr, *End = nullptr;
2081  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2082  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2083  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2084  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2085  << "\n");
2086  return {Start, End};
2087  }
2088 }
2089 
2090 /// Turns a collection of checks into a collection of expanded upper and
2091 /// lower bounds for both pointers in the check.
2094  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2095  const RuntimePointerChecking &PtrRtChecking) {
2097 
2098  // Here we're relying on the SCEV Expander's cache to only emit code for the
2099  // same bounds once.
2100  transform(
2101  PointerChecks, std::back_inserter(ChecksWithBounds),
2103  PointerBounds
2104  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2105  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2106  return std::make_pair(First, Second);
2107  });
2108 
2109  return ChecksWithBounds;
2110 }
2111 
2112 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2113  Instruction *Loc,
2115  const {
2116  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2117  auto *SE = PSE->getSE();
2118  SCEVExpander Exp(*SE, DL, "induction");
2119  auto ExpandedChecks =
2120  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2121 
2122  LLVMContext &Ctx = Loc->getContext();
2123  Instruction *FirstInst = nullptr;
2124  IRBuilder<> ChkBuilder(Loc);
2125  // Our instructions might fold to a constant.
2126  Value *MemoryRuntimeCheck = nullptr;
2127 
2128  for (const auto &Check : ExpandedChecks) {
2129  const PointerBounds &A = Check.first, &B = Check.second;
2130  // Check if two pointers (A and B) conflict where conflict is computed as:
2131  // start(A) <= end(B) && start(B) <= end(A)
2132  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2133  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2134 
2135  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2136  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2137  "Trying to bounds check pointers with different address spaces");
2138 
2139  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2140  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2141 
2142  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2143  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2144  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2145  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2146 
2147  // [A|B].Start points to the first accessed byte under base [A|B].
2148  // [A|B].End points to the last accessed byte, plus one.
2149  // There is no conflict when the intervals are disjoint:
2150  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2151  //
2152  // bound0 = (B.Start < A.End)
2153  // bound1 = (A.Start < B.End)
2154  // IsConflict = bound0 & bound1
2155  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2156  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2157  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2158  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2159  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2160  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2161  if (MemoryRuntimeCheck) {
2162  IsConflict =
2163  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2164  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2165  }
2166  MemoryRuntimeCheck = IsConflict;
2167  }
2168 
2169  if (!MemoryRuntimeCheck)
2170  return std::make_pair(nullptr, nullptr);
2171 
2172  // We have to do this trickery because the IRBuilder might fold the check to a
2173  // constant expression in which case there is no Instruction anchored in a
2174  // the block.
2175  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2176  ConstantInt::getTrue(Ctx));
2177  ChkBuilder.Insert(Check, "memcheck.conflict");
2178  FirstInst = getFirstInst(FirstInst, Check, Loc);
2179  return std::make_pair(FirstInst, Check);
2180 }
2181 
2182 std::pair<Instruction *, Instruction *>
2184  if (!PtrRtChecking->Need)
2185  return std::make_pair(nullptr, nullptr);
2186 
2187  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2188 }
2189 
2190 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2191  Value *Ptr = nullptr;
2192  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2193  Ptr = LI->getPointerOperand();
2194  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2195  Ptr = SI->getPointerOperand();
2196  else
2197  return;
2198 
2199  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2200  if (!Stride)
2201  return;
2202 
2203  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2204  "versioning:");
2205  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2206 
2207  // Avoid adding the "Stride == 1" predicate when we know that
2208  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2209  // or zero iteration loop, as Trip-Count <= Stride == 1.
2210  //
2211  // TODO: We are currently not making a very informed decision on when it is
2212  // beneficial to apply stride versioning. It might make more sense that the
2213  // users of this analysis (such as the vectorizer) will trigger it, based on
2214  // their specific cost considerations; For example, in cases where stride
2215  // versioning does not help resolving memory accesses/dependences, the
2216  // vectorizer should evaluate the cost of the runtime test, and the benefit
2217  // of various possible stride specializations, considering the alternatives
2218  // of using gather/scatters (if available).
2219 
2220  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2221  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2222 
2223  // Match the types so we can compare the stride and the BETakenCount.
2224  // The Stride can be positive/negative, so we sign extend Stride;
2225  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
2226  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2227  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2228  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2229  const SCEV *CastedStride = StrideExpr;
2230  const SCEV *CastedBECount = BETakenCount;
2231  ScalarEvolution *SE = PSE->getSE();
2232  if (BETypeSize >= StrideTypeSize)
2233  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2234  else
2235  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2236  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2237  // Since TripCount == BackEdgeTakenCount + 1, checking:
2238  // "Stride >= TripCount" is equivalent to checking:
2239  // Stride - BETakenCount > 0
2240  if (SE->isKnownPositive(StrideMinusBETaken)) {
2241  LLVM_DEBUG(
2242  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2243  "Stride==1 predicate will imply that the loop executes "
2244  "at most once.\n");
2245  return;
2246  }
2247  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2248 
2249  SymbolicStrides[Ptr] = Stride;
2250  StrideSet.insert(Stride);
2251 }
2252 
2254  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2255  DominatorTree *DT, LoopInfo *LI)
2256  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2257  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2258  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2259  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2260  StoreToLoopInvariantAddress(false) {
2261  if (canAnalyzeLoop())
2262  analyzeLoop(AA, LI, TLI, DT);
2263 }
2264 
2265 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2266  if (CanVecMem) {
2267  OS.indent(Depth) << "Memory dependences are safe";
2268  if (MaxSafeDepDistBytes != -1ULL)
2269  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2270  << " bytes";
2271  if (PtrRtChecking->Need)
2272  OS << " with run-time checks";
2273  OS << "\n";
2274  }
2275 
2276  if (Report)
2277  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2278 
2279  if (auto *Dependences = DepChecker->getDependences()) {
2280  OS.indent(Depth) << "Dependences:\n";
2281  for (auto &Dep : *Dependences) {
2282  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2283  OS << "\n";
2284  }
2285  } else
2286  OS.indent(Depth) << "Too many dependences, not recorded\n";
2287 
2288  // List the pair of accesses need run-time checks to prove independence.
2289  PtrRtChecking->print(OS, Depth);
2290  OS << "\n";
2291 
2292  OS.indent(Depth) << "Store to invariant address was "
2293  << (StoreToLoopInvariantAddress ? "" : "not ")
2294  << "found in loop.\n";
2295 
2296  OS.indent(Depth) << "SCEV assumptions:\n";
2297  PSE->getUnionPredicate().print(OS, Depth);
2298 
2299  OS << "\n";
2300 
2301  OS.indent(Depth) << "Expressions re-written:\n";
2302  PSE->print(OS, Depth);
2303 }
2304 
2306  auto &LAI = LoopAccessInfoMap[L];
2307 
2308  if (!LAI)
2309  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2310 
2311  return *LAI.get();
2312 }
2313 
2315  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2316 
2317  for (Loop *TopLevelLoop : *LI)
2318  for (Loop *L : depth_first(TopLevelLoop)) {
2319  OS.indent(2) << L->getHeader()->getName() << ":\n";
2320  auto &LAI = LAA.getInfo(L);
2321  LAI.print(OS, 4);
2322  }
2323 }
2324 
2326  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2327  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2328  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2329  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2330  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2331  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2332 
2333  return false;
2334 }
2335 
2341 
2342  AU.setPreservesAll();
2343 }
2344 
2346 static const char laa_name[] = "Loop Access Analysis";
2347 #define LAA_NAME "loop-accesses"
2348 
2355 
2357 
2360  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2361 }
2362 
2363 namespace llvm {
2364 
2366  return new LoopAccessLegacyAnalysis();
2367  }
2368 
2369 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1784
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:213
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:250
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1547
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:241
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:656
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1752
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:936
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:713
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1056
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:377
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1493
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:87
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1559
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:306
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1130
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:384
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:337
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
static const unsigned End
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const AMDGPUAS & AS
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:346
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:188
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:244
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:567
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:556
Class for arbitrary precision integers.
Definition: APInt.h:69
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
Basic Alias true
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:285
Dependece between memory access instructions.
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:653
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:445
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
iterator end()
Definition: DenseMap.h:79
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1112
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:990
bool empty() const
Definition: LoopInfo.h:146
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:782
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:305
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
A vector that has set insertion semantics.
Definition: SetVector.h:41
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:119
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:960
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:426
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65
void resize(size_type N)
Definition: SmallVector.h:352