LLVM  6.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// \brief The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// \brief We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// \brief Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  if (auto *CI = dyn_cast<CastInst>(V))
140  if (CI->getOperand(0)->getType()->isIntegerTy())
141  return CI->getOperand(0);
142  return V;
143 }
144 
146  const ValueToValueMap &PtrToStride,
147  Value *Ptr, Value *OrigPtr) {
148  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149 
150  // If there is an entry in the map return the SCEV of the pointer with the
151  // symbolic stride replaced by one.
153  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154  if (SI != PtrToStride.end()) {
155  Value *StrideVal = SI->second;
156 
157  // Strip casts.
158  StrideVal = stripIntegerCast(StrideVal);
159 
160  ScalarEvolution *SE = PSE.getSE();
161  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162  const auto *CT =
163  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164 
165  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166  auto *Expr = PSE.getSCEV(Ptr);
167 
168  DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
169  << "\n");
170  return Expr;
171  }
172 
173  // Otherwise, just return the SCEV of the original pointer.
174  return OrigSCEV;
175 }
176 
177 /// Calculate Start and End points of memory access.
178 /// Let's assume A is the first access and B is a memory access on N-th loop
179 /// iteration. Then B is calculated as:
180 /// B = A + Step*N .
181 /// Step value may be positive or negative.
182 /// N is a calculated back-edge taken count:
183 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184 /// Start and End points are calculated in the following way:
185 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186 /// where SizeOfElt is the size of single memory access in bytes.
187 ///
188 /// There is no conflict when the intervals are disjoint:
189 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191  unsigned DepSetId, unsigned ASId,
192  const ValueToValueMap &Strides,
194  // Get the stride replaced scev.
195  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196  ScalarEvolution *SE = PSE.getSE();
197 
198  const SCEV *ScStart;
199  const SCEV *ScEnd;
200 
201  if (SE->isLoopInvariant(Sc, Lp))
202  ScStart = ScEnd = Sc;
203  else {
204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205  assert(AR && "Invalid addrec expression");
206  const SCEV *Ex = PSE.getBackedgeTakenCount();
207 
208  ScStart = AR->getStart();
209  ScEnd = AR->evaluateAtIteration(Ex, *SE);
210  const SCEV *Step = AR->getStepRecurrence(*SE);
211 
212  // For expressions with negative step, the upper bound is ScStart and the
213  // lower bound is ScEnd.
214  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215  if (CStep->getValue()->isNegative())
216  std::swap(ScStart, ScEnd);
217  } else {
218  // Fallback case: the step is not constant, but we can still
219  // get the upper and lower bounds of the interval by using min/max
220  // expressions.
221  ScStart = SE->getUMinExpr(ScStart, ScEnd);
222  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223  }
224  // Add the size of the pointed element to ScEnd.
225  unsigned EltSize =
227  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229  }
230 
231  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232 }
233 
235 RuntimePointerChecking::generateChecks() const {
237 
238  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242 
243  if (needsChecking(CGI, CGJ))
244  Checks.push_back(std::make_pair(&CGI, &CGJ));
245  }
246  }
247  return Checks;
248 }
249 
250 void RuntimePointerChecking::generateChecks(
251  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252  assert(Checks.empty() && "Checks is not empty");
253  groupChecks(DepCands, UseDependencies);
254  Checks = generateChecks();
255 }
256 
258  const CheckingPtrGroup &N) const {
259  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261  if (needsChecking(M.Members[I], N.Members[J]))
262  return true;
263  return false;
264 }
265 
266 /// Compare \p I and \p J and return the minimum.
267 /// Return nullptr in case we couldn't find an answer.
268 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269  ScalarEvolution *SE) {
270  const SCEV *Diff = SE->getMinusSCEV(J, I);
271  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272 
273  if (!C)
274  return nullptr;
275  if (C->getValue()->isNegative())
276  return J;
277  return I;
278 }
279 
281  const SCEV *Start = RtCheck.Pointers[Index].Start;
282  const SCEV *End = RtCheck.Pointers[Index].End;
283 
284  // Compare the starts and ends with the known minimum and maximum
285  // of this set. We need to know how we compare against the min/max
286  // of the set in order to be able to emit memchecks.
287  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288  if (!Min0)
289  return false;
290 
291  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292  if (!Min1)
293  return false;
294 
295  // Update the low bound expression if we've found a new min value.
296  if (Min0 == Start)
297  Low = Start;
298 
299  // Update the high bound expression if we've found a new max value.
300  if (Min1 != End)
301  High = End;
302 
303  Members.push_back(Index);
304  return true;
305 }
306 
307 void RuntimePointerChecking::groupChecks(
308  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309  // We build the groups from dependency candidates equivalence classes
310  // because:
311  // - We know that pointers in the same equivalence class share
312  // the same underlying object and therefore there is a chance
313  // that we can compare pointers
314  // - We wouldn't be able to merge two pointers for which we need
315  // to emit a memcheck. The classes in DepCands are already
316  // conveniently built such that no two pointers in the same
317  // class need checking against each other.
318 
319  // We use the following (greedy) algorithm to construct the groups
320  // For every pointer in the equivalence class:
321  // For each existing group:
322  // - if the difference between this pointer and the min/max bounds
323  // of the group is a constant, then make the pointer part of the
324  // group and update the min/max bounds of that group as required.
325 
326  CheckingGroups.clear();
327 
328  // If we need to check two pointers to the same underlying object
329  // with a non-constant difference, we shouldn't perform any pointer
330  // grouping with those pointers. This is because we can easily get
331  // into cases where the resulting check would return false, even when
332  // the accesses are safe.
333  //
334  // The following example shows this:
335  // for (i = 0; i < 1000; ++i)
336  // a[5000 + i * m] = a[i] + a[i + 9000]
337  //
338  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339  // (0, 10000) which is always false. However, if m is 1, there is no
340  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341  // us to perform an accurate check in this case.
342  //
343  // The above case requires that we have an UnknownDependence between
344  // accesses to the same underlying object. This cannot happen unless
345  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
346  // is also false. In this case we will use the fallback path and create
347  // separate checking groups for all pointers.
348 
349  // If we don't have the dependency partitions, construct a new
350  // checking pointer group for each pointer. This is also required
351  // for correctness, because in this case we can have checking between
352  // pointers to the same underlying object.
353  if (!UseDependencies) {
354  for (unsigned I = 0; I < Pointers.size(); ++I)
355  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356  return;
357  }
358 
359  unsigned TotalComparisons = 0;
360 
361  DenseMap<Value *, unsigned> PositionMap;
362  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363  PositionMap[Pointers[Index].PointerValue] = Index;
364 
365  // We need to keep track of what pointers we've already seen so we
366  // don't process them twice.
368 
369  // Go through all equivalence classes, get the "pointer check groups"
370  // and add them to the overall solution. We use the order in which accesses
371  // appear in 'Pointers' to enforce determinism.
372  for (unsigned I = 0; I < Pointers.size(); ++I) {
373  // We've seen this pointer before, and therefore already processed
374  // its equivalence class.
375  if (Seen.count(I))
376  continue;
377 
378  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379  Pointers[I].IsWritePtr);
380 
382  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383 
384  // Because DepCands is constructed by visiting accesses in the order in
385  // which they appear in alias sets (which is deterministic) and the
386  // iteration order within an equivalence class member is only dependent on
387  // the order in which unions and insertions are performed on the
388  // equivalence class, the iteration order is deterministic.
389  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390  MI != ME; ++MI) {
391  unsigned Pointer = PositionMap[MI->getPointer()];
392  bool Merged = false;
393  // Mark this pointer as seen.
394  Seen.insert(Pointer);
395 
396  // Go through all the existing sets and see if we can find one
397  // which can include this pointer.
398  for (CheckingPtrGroup &Group : Groups) {
399  // Don't perform more than a certain amount of comparisons.
400  // This should limit the cost of grouping the pointers to something
401  // reasonable. If we do end up hitting this threshold, the algorithm
402  // will create separate groups for all remaining pointers.
403  if (TotalComparisons > MemoryCheckMergeThreshold)
404  break;
405 
406  TotalComparisons++;
407 
408  if (Group.addPointer(Pointer)) {
409  Merged = true;
410  break;
411  }
412  }
413 
414  if (!Merged)
415  // We couldn't add this pointer to any existing set or the threshold
416  // for the number of comparisons has been reached. Create a new group
417  // to hold the current pointer.
418  Groups.push_back(CheckingPtrGroup(Pointer, *this));
419  }
420 
421  // We've computed the grouped checks for this partition.
422  // Save the results and continue with the next one.
423  std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
424  }
425 }
426 
428  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429  unsigned PtrIdx2) {
430  return (PtrToPartition[PtrIdx1] != -1 &&
431  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432 }
433 
434 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435  const PointerInfo &PointerI = Pointers[I];
436  const PointerInfo &PointerJ = Pointers[J];
437 
438  // No need to check if two readonly pointers intersect.
439  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440  return false;
441 
442  // Only need to check pointers between two different dependency sets.
443  if (PointerI.DependencySetId == PointerJ.DependencySetId)
444  return false;
445 
446  // Only need to check pointers in the same alias set.
447  if (PointerI.AliasSetId != PointerJ.AliasSetId)
448  return false;
449 
450  return true;
451 }
452 
454  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455  unsigned Depth) const {
456  unsigned N = 0;
457  for (const auto &Check : Checks) {
458  const auto &First = Check.first->Members, &Second = Check.second->Members;
459 
460  OS.indent(Depth) << "Check " << N++ << ":\n";
461 
462  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463  for (unsigned K = 0; K < First.size(); ++K)
464  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465 
466  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467  for (unsigned K = 0; K < Second.size(); ++K)
468  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469  }
470 }
471 
473 
474  OS.indent(Depth) << "Run-time memory checks:\n";
475  printChecks(OS, Checks, Depth);
476 
477  OS.indent(Depth) << "Grouped accesses:\n";
478  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479  const auto &CG = CheckingGroups[I];
480 
481  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483  << ")\n";
484  for (unsigned J = 0; J < CG.Members.size(); ++J) {
485  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486  << "\n";
487  }
488  }
489 }
490 
491 namespace {
492 
493 /// \brief Analyses memory accesses in a loop.
494 ///
495 /// Checks whether run time pointer checks are needed and builds sets for data
496 /// dependence checking.
497 class AccessAnalysis {
498 public:
499  /// \brief Read or write access location.
500  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502 
503  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
506  : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
507  PSE(PSE) {}
508 
509  /// \brief Register a load and whether it is only read from.
510  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511  Value *Ptr = const_cast<Value*>(Loc.Ptr);
512  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
513  Accesses.insert(MemAccessInfo(Ptr, false));
514  if (IsReadOnly)
515  ReadOnlyPtr.insert(Ptr);
516  }
517 
518  /// \brief Register a store.
519  void addStore(MemoryLocation &Loc) {
520  Value *Ptr = const_cast<Value*>(Loc.Ptr);
521  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
522  Accesses.insert(MemAccessInfo(Ptr, true));
523  }
524 
525  /// \brief Check if we can emit a run-time no-alias check for \p Access.
526  ///
527  /// Returns true if we can emit a run-time no alias check for \p Access.
528  /// If we can check this access, this also adds it to a dependence set and
529  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530  /// we will attempt to use additional run-time checks in order to get
531  /// the bounds of the pointer.
532  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533  MemAccessInfo Access,
534  const ValueToValueMap &Strides,
535  DenseMap<Value *, unsigned> &DepSetId,
536  Loop *TheLoop, unsigned &RunningDepId,
537  unsigned ASId, bool ShouldCheckStride,
538  bool Assume);
539 
540  /// \brief Check whether we can check the pointers at runtime for
541  /// non-intersection.
542  ///
543  /// Returns true if we need no check or if we do and we can generate them
544  /// (i.e. the pointers have computable bounds).
545  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546  Loop *TheLoop, const ValueToValueMap &Strides,
547  bool ShouldCheckWrap = false);
548 
549  /// \brief Goes over all memory accesses, checks whether a RT check is needed
550  /// and builds sets of dependent accesses.
551  void buildDependenceSets() {
552  processMemAccesses();
553  }
554 
555  /// \brief Initial processing of memory accesses determined that we need to
556  /// perform dependency checking.
557  ///
558  /// Note that this can later be cleared if we retry memcheck analysis without
559  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
560  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562  /// We decided that no dependence analysis would be used. Reset the state.
563  void resetDepChecks(MemoryDepChecker &DepChecker) {
564  CheckDeps.clear();
565  DepChecker.clearDependences();
566  }
567 
568  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571  typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573  /// \brief Go over all memory access and check whether runtime pointer checks
574  /// are needed and build sets of dependency check candidates.
575  void processMemAccesses();
576 
577  /// Set of all accesses.
578  PtrAccessSet Accesses;
579 
580  const DataLayout &DL;
581 
582  /// List of accesses that need a further dependence check.
583  MemAccessInfoList CheckDeps;
584 
585  /// Set of pointers that are read only.
586  SmallPtrSet<Value*, 16> ReadOnlyPtr;
587 
588  /// An alias set tracker to partition the access set by underlying object and
589  //intrinsic property (such as TBAA metadata).
590  AliasSetTracker AST;
591 
592  LoopInfo *LI;
593 
594  /// Sets of potentially dependent accesses - members of one set share an
595  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
596  /// dependence check.
598 
599  /// \brief Initial processing of memory accesses determined that we may need
600  /// to add memchecks. Perform the analysis to determine the necessary checks.
601  ///
602  /// Note that, this is different from isDependencyCheckNeeded. When we retry
603  /// memcheck analysis without dependency checking
604  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
605  /// while this remains set if we have potentially dependent accesses.
606  bool IsRTCheckAnalysisNeeded;
607 
608  /// The SCEV predicate containing all the SCEV-related assumptions.
610 };
611 
612 } // end anonymous namespace
613 
614 /// \brief Check whether a pointer can participate in a runtime bounds check.
615 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
616 /// by adding run-time checks (overflow checks) if necessary.
618  const ValueToValueMap &Strides, Value *Ptr,
619  Loop *L, bool Assume) {
620  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
621 
622  // The bounds for loop-invariant pointer is trivial.
623  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
624  return true;
625 
626  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
627 
628  if (!AR && Assume)
629  AR = PSE.getAsAddRec(Ptr);
630 
631  if (!AR)
632  return false;
633 
634  return AR->isAffine();
635 }
636 
637 /// \brief Check whether a pointer address cannot wrap.
639  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
640  const SCEV *PtrScev = PSE.getSCEV(Ptr);
641  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
642  return true;
643 
644  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
645  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
646  return true;
647 
648  return false;
649 }
650 
651 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
652  MemAccessInfo Access,
653  const ValueToValueMap &StridesMap,
654  DenseMap<Value *, unsigned> &DepSetId,
655  Loop *TheLoop, unsigned &RunningDepId,
656  unsigned ASId, bool ShouldCheckWrap,
657  bool Assume) {
658  Value *Ptr = Access.getPointer();
659 
660  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
661  return false;
662 
663  // When we run after a failing dependency check we have to make sure
664  // we don't have wrapping pointers.
665  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
666  auto *Expr = PSE.getSCEV(Ptr);
667  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
668  return false;
669  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
670  }
671 
672  // The id of the dependence set.
673  unsigned DepId;
674 
675  if (isDependencyCheckNeeded()) {
676  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
677  unsigned &LeaderId = DepSetId[Leader];
678  if (!LeaderId)
679  LeaderId = RunningDepId++;
680  DepId = LeaderId;
681  } else
682  // Each access has its own dependence set.
683  DepId = RunningDepId++;
684 
685  bool IsWrite = Access.getInt();
686  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
687  DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
688 
689  return true;
690  }
691 
692 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
693  ScalarEvolution *SE, Loop *TheLoop,
694  const ValueToValueMap &StridesMap,
695  bool ShouldCheckWrap) {
696  // Find pointers with computable bounds. We are going to use this information
697  // to place a runtime bound check.
698  bool CanDoRT = true;
699 
700  bool NeedRTCheck = false;
701  if (!IsRTCheckAnalysisNeeded) return true;
702 
703  bool IsDepCheckNeeded = isDependencyCheckNeeded();
704 
705  // We assign a consecutive id to access from different alias sets.
706  // Accesses between different groups doesn't need to be checked.
707  unsigned ASId = 1;
708  for (auto &AS : AST) {
709  int NumReadPtrChecks = 0;
710  int NumWritePtrChecks = 0;
711  bool CanDoAliasSetRT = true;
712 
713  // We assign consecutive id to access from different dependence sets.
714  // Accesses within the same set don't need a runtime check.
715  unsigned RunningDepId = 1;
717 
719 
720  for (auto A : AS) {
721  Value *Ptr = A.getValue();
722  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
723  MemAccessInfo Access(Ptr, IsWrite);
724 
725  if (IsWrite)
726  ++NumWritePtrChecks;
727  else
728  ++NumReadPtrChecks;
729 
730  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
731  RunningDepId, ASId, ShouldCheckWrap, false)) {
732  DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
733  Retries.push_back(Access);
734  CanDoAliasSetRT = false;
735  }
736  }
737 
738  // If we have at least two writes or one write and a read then we need to
739  // check them. But there is no need to checks if there is only one
740  // dependence set for this alias set.
741  //
742  // Note that this function computes CanDoRT and NeedRTCheck independently.
743  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
744  // for which we couldn't find the bounds but we don't actually need to emit
745  // any checks so it does not matter.
746  bool NeedsAliasSetRTCheck = false;
747  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
748  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
749  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
750 
751  // We need to perform run-time alias checks, but some pointers had bounds
752  // that couldn't be checked.
753  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
754  // Reset the CanDoSetRt flag and retry all accesses that have failed.
755  // We know that we need these checks, so we can now be more aggressive
756  // and add further checks if required (overflow checks).
757  CanDoAliasSetRT = true;
758  for (auto Access : Retries)
759  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
760  TheLoop, RunningDepId, ASId,
761  ShouldCheckWrap, /*Assume=*/true)) {
762  CanDoAliasSetRT = false;
763  break;
764  }
765  }
766 
767  CanDoRT &= CanDoAliasSetRT;
768  NeedRTCheck |= NeedsAliasSetRTCheck;
769  ++ASId;
770  }
771 
772  // If the pointers that we would use for the bounds comparison have different
773  // address spaces, assume the values aren't directly comparable, so we can't
774  // use them for the runtime check. We also have to assume they could
775  // overlap. In the future there should be metadata for whether address spaces
776  // are disjoint.
777  unsigned NumPointers = RtCheck.Pointers.size();
778  for (unsigned i = 0; i < NumPointers; ++i) {
779  for (unsigned j = i + 1; j < NumPointers; ++j) {
780  // Only need to check pointers between two different dependency sets.
781  if (RtCheck.Pointers[i].DependencySetId ==
782  RtCheck.Pointers[j].DependencySetId)
783  continue;
784  // Only need to check pointers in the same alias set.
785  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
786  continue;
787 
788  Value *PtrI = RtCheck.Pointers[i].PointerValue;
789  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
790 
791  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
792  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
793  if (ASi != ASj) {
794  DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
795  " different address spaces\n");
796  return false;
797  }
798  }
799  }
800 
801  if (NeedRTCheck && CanDoRT)
802  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
803 
804  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
805  << " pointer comparisons.\n");
806 
807  RtCheck.Need = NeedRTCheck;
808 
809  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
810  if (!CanDoRTIfNeeded)
811  RtCheck.reset();
812  return CanDoRTIfNeeded;
813 }
814 
815 void AccessAnalysis::processMemAccesses() {
816  // We process the set twice: first we process read-write pointers, last we
817  // process read-only pointers. This allows us to skip dependence tests for
818  // read-only pointers.
819 
820  DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
821  DEBUG(dbgs() << " AST: "; AST.dump());
822  DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
823  DEBUG({
824  for (auto A : Accesses)
825  dbgs() << "\t" << *A.getPointer() << " (" <<
826  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
827  "read-only" : "read")) << ")\n";
828  });
829 
830  // The AliasSetTracker has nicely partitioned our pointers by metadata
831  // compatibility and potential for underlying-object overlap. As a result, we
832  // only need to check for potential pointer dependencies within each alias
833  // set.
834  for (auto &AS : AST) {
835  // Note that both the alias-set tracker and the alias sets themselves used
836  // linked lists internally and so the iteration order here is deterministic
837  // (matching the original instruction order within each set).
838 
839  bool SetHasWrite = false;
840 
841  // Map of pointers to last access encountered.
842  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
843  UnderlyingObjToAccessMap ObjToLastAccess;
844 
845  // Set of access to check after all writes have been processed.
846  PtrAccessSet DeferredAccesses;
847 
848  // Iterate over each alias set twice, once to process read/write pointers,
849  // and then to process read-only pointers.
850  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
851  bool UseDeferred = SetIteration > 0;
852  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
853 
854  for (auto AV : AS) {
855  Value *Ptr = AV.getValue();
856 
857  // For a single memory access in AliasSetTracker, Accesses may contain
858  // both read and write, and they both need to be handled for CheckDeps.
859  for (auto AC : S) {
860  if (AC.getPointer() != Ptr)
861  continue;
862 
863  bool IsWrite = AC.getInt();
864 
865  // If we're using the deferred access set, then it contains only
866  // reads.
867  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
868  if (UseDeferred && !IsReadOnlyPtr)
869  continue;
870  // Otherwise, the pointer must be in the PtrAccessSet, either as a
871  // read or a write.
872  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
873  S.count(MemAccessInfo(Ptr, false))) &&
874  "Alias-set pointer not in the access set?");
875 
876  MemAccessInfo Access(Ptr, IsWrite);
877  DepCands.insert(Access);
878 
879  // Memorize read-only pointers for later processing and skip them in
880  // the first round (they need to be checked after we have seen all
881  // write pointers). Note: we also mark pointer that are not
882  // consecutive as "read-only" pointers (so that we check
883  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
884  if (!UseDeferred && IsReadOnlyPtr) {
885  DeferredAccesses.insert(Access);
886  continue;
887  }
888 
889  // If this is a write - check other reads and writes for conflicts. If
890  // this is a read only check other writes for conflicts (but only if
891  // there is no other write to the ptr - this is an optimization to
892  // catch "a[i] = a[i] + " without having to do a dependence check).
893  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
894  CheckDeps.push_back(Access);
895  IsRTCheckAnalysisNeeded = true;
896  }
897 
898  if (IsWrite)
899  SetHasWrite = true;
900 
901  // Create sets of pointers connected by a shared alias set and
902  // underlying object.
903  typedef SmallVector<Value *, 16> ValueVector;
904  ValueVector TempObjects;
905 
906  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
907  DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
908  for (Value *UnderlyingObj : TempObjects) {
909  // nullptr never alias, don't join sets for pointer that have "null"
910  // in their UnderlyingObjects list.
911  if (isa<ConstantPointerNull>(UnderlyingObj))
912  continue;
913 
914  UnderlyingObjToAccessMap::iterator Prev =
915  ObjToLastAccess.find(UnderlyingObj);
916  if (Prev != ObjToLastAccess.end())
917  DepCands.unionSets(Access, Prev->second);
918 
919  ObjToLastAccess[UnderlyingObj] = Access;
920  DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
921  }
922  }
923  }
924  }
925  }
926 }
927 
928 static bool isInBoundsGep(Value *Ptr) {
929  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
930  return GEP->isInBounds();
931  return false;
932 }
933 
934 /// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
935 /// i.e. monotonically increasing/decreasing.
936 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
937  PredicatedScalarEvolution &PSE, const Loop *L) {
938  // FIXME: This should probably only return true for NUW.
940  return true;
941 
942  // Scalar evolution does not propagate the non-wrapping flags to values that
943  // are derived from a non-wrapping induction variable because non-wrapping
944  // could be flow-sensitive.
945  //
946  // Look through the potentially overflowing instruction to try to prove
947  // non-wrapping for the *specific* value of Ptr.
948 
949  // The arithmetic implied by an inbounds GEP can't overflow.
950  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
951  if (!GEP || !GEP->isInBounds())
952  return false;
953 
954  // Make sure there is only one non-const index and analyze that.
955  Value *NonConstIndex = nullptr;
956  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
957  if (!isa<ConstantInt>(Index)) {
958  if (NonConstIndex)
959  return false;
960  NonConstIndex = Index;
961  }
962  if (!NonConstIndex)
963  // The recurrence is on the pointer, ignore for now.
964  return false;
965 
966  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
967  // AddRec using a NSW operation.
968  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
969  if (OBO->hasNoSignedWrap() &&
970  // Assume constant for other the operand so that the AddRec can be
971  // easily found.
972  isa<ConstantInt>(OBO->getOperand(1))) {
973  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
974 
975  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
976  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
977  }
978 
979  return false;
980 }
981 
982 /// \brief Check whether the access through \p Ptr has a constant stride.
984  const Loop *Lp, const ValueToValueMap &StridesMap,
985  bool Assume, bool ShouldCheckWrap) {
986  Type *Ty = Ptr->getType();
987  assert(Ty->isPointerTy() && "Unexpected non-ptr");
988 
989  // Make sure that the pointer does not point to aggregate types.
990  auto *PtrTy = cast<PointerType>(Ty);
991  if (PtrTy->getElementType()->isAggregateType()) {
992  DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr
993  << "\n");
994  return 0;
995  }
996 
997  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
998 
999  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1000  if (Assume && !AR)
1001  AR = PSE.getAsAddRec(Ptr);
1002 
1003  if (!AR) {
1004  DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1005  << " SCEV: " << *PtrScev << "\n");
1006  return 0;
1007  }
1008 
1009  // The accesss function must stride over the innermost loop.
1010  if (Lp != AR->getLoop()) {
1011  DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
1012  *Ptr << " SCEV: " << *AR << "\n");
1013  return 0;
1014  }
1015 
1016  // The address calculation must not wrap. Otherwise, a dependence could be
1017  // inverted.
1018  // An inbounds getelementptr that is a AddRec with a unit stride
1019  // cannot wrap per definition. The unit stride requirement is checked later.
1020  // An getelementptr without an inbounds attribute and unit stride would have
1021  // to access the pointer value "0" which is undefined behavior in address
1022  // space 0, therefore we can also vectorize this case.
1023  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1024  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1026  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1027  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
1028  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
1029  if (Assume) {
1031  IsNoWrapAddRec = true;
1032  DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1033  << "LAA: Pointer: " << *Ptr << "\n"
1034  << "LAA: SCEV: " << *AR << "\n"
1035  << "LAA: Added an overflow assumption\n");
1036  } else {
1037  DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1038  << *Ptr << " SCEV: " << *AR << "\n");
1039  return 0;
1040  }
1041  }
1042 
1043  // Check the step is constant.
1044  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1045 
1046  // Calculate the pointer stride and check if it is constant.
1047  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1048  if (!C) {
1049  DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
1050  " SCEV: " << *AR << "\n");
1051  return 0;
1052  }
1053 
1054  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1055  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1056  const APInt &APStepVal = C->getAPInt();
1057 
1058  // Huge step value - give up.
1059  if (APStepVal.getBitWidth() > 64)
1060  return 0;
1061 
1062  int64_t StepVal = APStepVal.getSExtValue();
1063 
1064  // Strided access.
1065  int64_t Stride = StepVal / Size;
1066  int64_t Rem = StepVal % Size;
1067  if (Rem)
1068  return 0;
1069 
1070  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1071  // know we can't "wrap around the address space". In case of address space
1072  // zero we know that this won't happen without triggering undefined behavior.
1073  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
1074  Stride != 1 && Stride != -1) {
1075  if (Assume) {
1076  // We can avoid this case by adding a run-time check.
1077  DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1078  << "inbouds or in address space 0 may wrap:\n"
1079  << "LAA: Pointer: " << *Ptr << "\n"
1080  << "LAA: SCEV: " << *AR << "\n"
1081  << "LAA: Added an overflow assumption\n");
1083  } else
1084  return 0;
1085  }
1086 
1087  return Stride;
1088 }
1089 
1090 /// Take the pointer operand from the Load/Store instruction.
1091 /// Returns NULL if this is not a valid Load/Store instruction.
1093  if (auto *LI = dyn_cast<LoadInst>(I))
1094  return LI->getPointerOperand();
1095  if (auto *SI = dyn_cast<StoreInst>(I))
1096  return SI->getPointerOperand();
1097  return nullptr;
1098 }
1099 
1100 /// Take the address space operand from the Load/Store instruction.
1101 /// Returns -1 if this is not a valid Load/Store instruction.
1102 static unsigned getAddressSpaceOperand(Value *I) {
1103  if (LoadInst *L = dyn_cast<LoadInst>(I))
1104  return L->getPointerAddressSpace();
1105  if (StoreInst *S = dyn_cast<StoreInst>(I))
1106  return S->getPointerAddressSpace();
1107  return -1;
1108 }
1109 
1110 /// Returns true if the memory operations \p A and \p B are consecutive.
1112  ScalarEvolution &SE, bool CheckType) {
1113  Value *PtrA = getPointerOperand(A);
1114  Value *PtrB = getPointerOperand(B);
1115  unsigned ASA = getAddressSpaceOperand(A);
1116  unsigned ASB = getAddressSpaceOperand(B);
1117 
1118  // Check that the address spaces match and that the pointers are valid.
1119  if (!PtrA || !PtrB || (ASA != ASB))
1120  return false;
1121 
1122  // Make sure that A and B are different pointers.
1123  if (PtrA == PtrB)
1124  return false;
1125 
1126  // Make sure that A and B have the same type if required.
1127  if (CheckType && PtrA->getType() != PtrB->getType())
1128  return false;
1129 
1130  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
1131  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1132  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
1133 
1134  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
1135  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1136  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1137 
1138  // OffsetDelta = OffsetB - OffsetA;
1139  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1140  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1141  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1142  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1143  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1144  // Check if they are based on the same pointer. That makes the offsets
1145  // sufficient.
1146  if (PtrA == PtrB)
1147  return OffsetDelta == Size;
1148 
1149  // Compute the necessary base pointer delta to have the necessary final delta
1150  // equal to the size.
1151  // BaseDelta = Size - OffsetDelta;
1152  const SCEV *SizeSCEV = SE.getConstant(Size);
1153  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1154 
1155  // Otherwise compute the distance with SCEV between the base pointers.
1156  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1157  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1158  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1159  return X == PtrSCEVB;
1160 }
1161 
1163  switch (Type) {
1164  case NoDep:
1165  case Forward:
1166  case BackwardVectorizable:
1167  return true;
1168 
1169  case Unknown:
1170  case ForwardButPreventsForwarding:
1171  case Backward:
1172  case BackwardVectorizableButPreventsForwarding:
1173  return false;
1174  }
1175  llvm_unreachable("unexpected DepType!");
1176 }
1177 
1179  switch (Type) {
1180  case NoDep:
1181  case Forward:
1182  case ForwardButPreventsForwarding:
1183  case Unknown:
1184  return false;
1185 
1186  case BackwardVectorizable:
1187  case Backward:
1188  case BackwardVectorizableButPreventsForwarding:
1189  return true;
1190  }
1191  llvm_unreachable("unexpected DepType!");
1192 }
1193 
1195  return isBackward() || Type == Unknown;
1196 }
1197 
1199  switch (Type) {
1200  case Forward:
1201  case ForwardButPreventsForwarding:
1202  return true;
1203 
1204  case NoDep:
1205  case Unknown:
1206  case BackwardVectorizable:
1207  case Backward:
1208  case BackwardVectorizableButPreventsForwarding:
1209  return false;
1210  }
1211  llvm_unreachable("unexpected DepType!");
1212 }
1213 
1214 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1215  uint64_t TypeByteSize) {
1216  // If loads occur at a distance that is not a multiple of a feasible vector
1217  // factor store-load forwarding does not take place.
1218  // Positive dependences might cause troubles because vectorizing them might
1219  // prevent store-load forwarding making vectorized code run a lot slower.
1220  // a[i] = a[i-3] ^ a[i-8];
1221  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1222  // hence on your typical architecture store-load forwarding does not take
1223  // place. Vectorizing in such cases does not make sense.
1224  // Store-load forwarding distance.
1225 
1226  // After this many iterations store-to-load forwarding conflicts should not
1227  // cause any slowdowns.
1228  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1229  // Maximum vector factor.
1230  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1231  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1232 
1233  // Compute the smallest VF at which the store and load would be misaligned.
1234  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1235  VF *= 2) {
1236  // If the number of vector iteration between the store and the load are
1237  // small we could incur conflicts.
1238  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1239  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1240  break;
1241  }
1242  }
1243 
1244  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1245  DEBUG(dbgs() << "LAA: Distance " << Distance
1246  << " that could cause a store-load forwarding conflict\n");
1247  return true;
1248  }
1249 
1250  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1251  MaxVFWithoutSLForwardIssues !=
1252  VectorizerParams::MaxVectorWidth * TypeByteSize)
1253  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1254  return false;
1255 }
1256 
1257 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1258 /// memory accesses, that have the same stride whose absolute value is given
1259 /// in \p Stride, and that have the same type size \p TypeByteSize,
1260 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1261 /// possible to prove statically that the dependence distance is larger
1262 /// than the range that the accesses will travel through the execution of
1263 /// the loop. If so, return true; false otherwise. This is useful for
1264 /// example in loops such as the following (PR31098):
1265 /// for (i = 0; i < D; ++i) {
1266 /// = out[i];
1267 /// out[i+D] =
1268 /// }
1270  const SCEV &BackedgeTakenCount,
1271  const SCEV &Dist, uint64_t Stride,
1272  uint64_t TypeByteSize) {
1273 
1274  // If we can prove that
1275  // (**) |Dist| > BackedgeTakenCount * Step
1276  // where Step is the absolute stride of the memory accesses in bytes,
1277  // then there is no dependence.
1278  //
1279  // Ratioanle:
1280  // We basically want to check if the absolute distance (|Dist/Step|)
1281  // is >= the loop iteration count (or > BackedgeTakenCount).
1282  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1283  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1284  // that the dependence distance is >= VF; This is checked elsewhere.
1285  // But in some cases we can prune unknown dependence distances early, and
1286  // even before selecting the VF, and without a runtime test, by comparing
1287  // the distance against the loop iteration count. Since the vectorized code
1288  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1289  // also guarantees that distance >= VF.
1290  //
1291  const uint64_t ByteStride = Stride * TypeByteSize;
1292  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1293  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1294 
1295  const SCEV *CastedDist = &Dist;
1296  const SCEV *CastedProduct = Product;
1297  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1298  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1299 
1300  // The dependence distance can be positive/negative, so we sign extend Dist;
1301  // The multiplication of the absolute stride in bytes and the
1302  // backdgeTakenCount is non-negative, so we zero extend Product.
1303  if (DistTypeSize > ProductTypeSize)
1304  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1305  else
1306  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1307 
1308  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1309  // (If so, then we have proven (**) because |Dist| >= Dist)
1310  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1311  if (SE.isKnownPositive(Minus))
1312  return true;
1313 
1314  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1315  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1316  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1317  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1318  if (SE.isKnownPositive(Minus))
1319  return true;
1320 
1321  return false;
1322 }
1323 
1324 /// \brief Check the dependence for two accesses with the same stride \p Stride.
1325 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1326 /// bytes.
1327 ///
1328 /// \returns true if they are independent.
1329 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1330  uint64_t TypeByteSize) {
1331  assert(Stride > 1 && "The stride must be greater than 1");
1332  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1333  assert(Distance > 0 && "The distance must be non-zero");
1334 
1335  // Skip if the distance is not multiple of type byte size.
1336  if (Distance % TypeByteSize)
1337  return false;
1338 
1339  uint64_t ScaledDist = Distance / TypeByteSize;
1340 
1341  // No dependence if the scaled distance is not multiple of the stride.
1342  // E.g.
1343  // for (i = 0; i < 1024 ; i += 4)
1344  // A[i+2] = A[i] + 1;
1345  //
1346  // Two accesses in memory (scaled distance is 2, stride is 4):
1347  // | A[0] | | | | A[4] | | | |
1348  // | | | A[2] | | | | A[6] | |
1349  //
1350  // E.g.
1351  // for (i = 0; i < 1024 ; i += 3)
1352  // A[i+4] = A[i] + 1;
1353  //
1354  // Two accesses in memory (scaled distance is 4, stride is 3):
1355  // | A[0] | | | A[3] | | | A[6] | | |
1356  // | | | | | A[4] | | | A[7] | |
1357  return ScaledDist % Stride;
1358 }
1359 
1361 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1362  const MemAccessInfo &B, unsigned BIdx,
1363  const ValueToValueMap &Strides) {
1364  assert (AIdx < BIdx && "Must pass arguments in program order");
1365 
1366  Value *APtr = A.getPointer();
1367  Value *BPtr = B.getPointer();
1368  bool AIsWrite = A.getInt();
1369  bool BIsWrite = B.getInt();
1370 
1371  // Two reads are independent.
1372  if (!AIsWrite && !BIsWrite)
1373  return Dependence::NoDep;
1374 
1375  // We cannot check pointers in different address spaces.
1376  if (APtr->getType()->getPointerAddressSpace() !=
1377  BPtr->getType()->getPointerAddressSpace())
1378  return Dependence::Unknown;
1379 
1380  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1381  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1382 
1383  const SCEV *Src = PSE.getSCEV(APtr);
1384  const SCEV *Sink = PSE.getSCEV(BPtr);
1385 
1386  // If the induction step is negative we have to invert source and sink of the
1387  // dependence.
1388  if (StrideAPtr < 0) {
1389  std::swap(APtr, BPtr);
1390  std::swap(Src, Sink);
1391  std::swap(AIsWrite, BIsWrite);
1392  std::swap(AIdx, BIdx);
1393  std::swap(StrideAPtr, StrideBPtr);
1394  }
1395 
1396  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1397 
1398  DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1399  << "(Induction step: " << StrideAPtr << ")\n");
1400  DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1401  << *InstMap[BIdx] << ": " << *Dist << "\n");
1402 
1403  // Need accesses with constant stride. We don't want to vectorize
1404  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1405  // the address space.
1406  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1407  DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1408  return Dependence::Unknown;
1409  }
1410 
1411  Type *ATy = APtr->getType()->getPointerElementType();
1412  Type *BTy = BPtr->getType()->getPointerElementType();
1413  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1414  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1415  uint64_t Stride = std::abs(StrideAPtr);
1416  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1417  if (!C) {
1418  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1419  isSafeDependenceDistance(DL, *(PSE.getSE()),
1420  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1421  TypeByteSize))
1422  return Dependence::NoDep;
1423 
1424  DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1425  ShouldRetryWithRuntimeCheck = true;
1426  return Dependence::Unknown;
1427  }
1428 
1429  const APInt &Val = C->getAPInt();
1430  int64_t Distance = Val.getSExtValue();
1431 
1432  // Attempt to prove strided accesses independent.
1433  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1434  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1435  DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1436  return Dependence::NoDep;
1437  }
1438 
1439  // Negative distances are not plausible dependencies.
1440  if (Val.isNegative()) {
1441  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1442  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1443  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1444  ATy != BTy)) {
1445  DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1446  return Dependence::ForwardButPreventsForwarding;
1447  }
1448 
1449  DEBUG(dbgs() << "LAA: Dependence is negative\n");
1450  return Dependence::Forward;
1451  }
1452 
1453  // Write to the same location with the same size.
1454  // Could be improved to assert type sizes are the same (i32 == float, etc).
1455  if (Val == 0) {
1456  if (ATy == BTy)
1457  return Dependence::Forward;
1458  DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
1459  return Dependence::Unknown;
1460  }
1461 
1462  assert(Val.isStrictlyPositive() && "Expect a positive value");
1463 
1464  if (ATy != BTy) {
1465  DEBUG(dbgs() <<
1466  "LAA: ReadWrite-Write positive dependency with different types\n");
1467  return Dependence::Unknown;
1468  }
1469 
1470  // Bail out early if passed-in parameters make vectorization not feasible.
1471  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1473  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1475  // The minimum number of iterations for a vectorized/unrolled version.
1476  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1477 
1478  // It's not vectorizable if the distance is smaller than the minimum distance
1479  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1480  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1481  // TypeByteSize (No need to plus the last gap distance).
1482  //
1483  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1484  // foo(int *A) {
1485  // int *B = (int *)((char *)A + 14);
1486  // for (i = 0 ; i < 1024 ; i += 2)
1487  // B[i] = A[i] + 1;
1488  // }
1489  //
1490  // Two accesses in memory (stride is 2):
1491  // | A[0] | | A[2] | | A[4] | | A[6] | |
1492  // | B[0] | | B[2] | | B[4] |
1493  //
1494  // Distance needs for vectorizing iterations except the last iteration:
1495  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1496  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1497  //
1498  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1499  // 12, which is less than distance.
1500  //
1501  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1502  // the minimum distance needed is 28, which is greater than distance. It is
1503  // not safe to do vectorization.
1504  uint64_t MinDistanceNeeded =
1505  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1506  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1507  DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
1508  << '\n');
1509  return Dependence::Backward;
1510  }
1511 
1512  // Unsafe if the minimum distance needed is greater than max safe distance.
1513  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1514  DEBUG(dbgs() << "LAA: Failure because it needs at least "
1515  << MinDistanceNeeded << " size in bytes");
1516  return Dependence::Backward;
1517  }
1518 
1519  // Positive distance bigger than max vectorization factor.
1520  // FIXME: Should use max factor instead of max distance in bytes, which could
1521  // not handle different types.
1522  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1523  // void foo (int *A, char *B) {
1524  // for (unsigned i = 0; i < 1024; i++) {
1525  // A[i+2] = A[i] + 1;
1526  // B[i+2] = B[i] + 1;
1527  // }
1528  // }
1529  //
1530  // This case is currently unsafe according to the max safe distance. If we
1531  // analyze the two accesses on array B, the max safe dependence distance
1532  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1533  // is 8, which is less than 2 and forbidden vectorization, But actually
1534  // both A and B could be vectorized by 2 iterations.
1535  MaxSafeDepDistBytes =
1536  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1537 
1538  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1539  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1540  couldPreventStoreLoadForward(Distance, TypeByteSize))
1541  return Dependence::BackwardVectorizableButPreventsForwarding;
1542 
1543  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1544  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1545  << " with max VF = " << MaxVF << '\n');
1546  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1547  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1548  return Dependence::BackwardVectorizable;
1549 }
1550 
1552  MemAccessInfoList &CheckDeps,
1553  const ValueToValueMap &Strides) {
1554 
1555  MaxSafeDepDistBytes = -1;
1557  for (MemAccessInfo CurAccess : CheckDeps) {
1558  if (Visited.count(CurAccess))
1559  continue;
1560 
1561  // Get the relevant memory access set.
1563  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1564 
1565  // Check accesses within this set.
1567  AccessSets.member_begin(I);
1569  AccessSets.member_end();
1570 
1571  // Check every access pair.
1572  while (AI != AE) {
1573  Visited.insert(*AI);
1575  while (OI != AE) {
1576  // Check every accessing instruction pair in program order.
1577  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1578  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1579  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1580  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1581  auto A = std::make_pair(&*AI, *I1);
1582  auto B = std::make_pair(&*OI, *I2);
1583 
1584  assert(*I1 != *I2);
1585  if (*I1 > *I2)
1586  std::swap(A, B);
1587 
1589  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1590  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1591 
1592  // Gather dependences unless we accumulated MaxDependences
1593  // dependences. In that case return as soon as we find the first
1594  // unsafe dependence. This puts a limit on this quadratic
1595  // algorithm.
1596  if (RecordDependences) {
1597  if (Type != Dependence::NoDep)
1598  Dependences.push_back(Dependence(A.second, B.second, Type));
1599 
1600  if (Dependences.size() >= MaxDependences) {
1601  RecordDependences = false;
1602  Dependences.clear();
1603  DEBUG(dbgs() << "Too many dependences, stopped recording\n");
1604  }
1605  }
1606  if (!RecordDependences && !SafeForVectorization)
1607  return false;
1608  }
1609  ++OI;
1610  }
1611  AI++;
1612  }
1613  }
1614 
1615  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1616  return SafeForVectorization;
1617 }
1618 
1621  MemAccessInfo Access(Ptr, isWrite);
1622  auto &IndexVector = Accesses.find(Access)->second;
1623 
1625  transform(IndexVector,
1626  std::back_inserter(Insts),
1627  [&](unsigned Idx) { return this->InstMap[Idx]; });
1628  return Insts;
1629 }
1630 
1631 const char *MemoryDepChecker::Dependence::DepName[] = {
1632  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1633  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1634 
1636  raw_ostream &OS, unsigned Depth,
1637  const SmallVectorImpl<Instruction *> &Instrs) const {
1638  OS.indent(Depth) << DepName[Type] << ":\n";
1639  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1640  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1641 }
1642 
1643 bool LoopAccessInfo::canAnalyzeLoop() {
1644  // We need to have a loop header.
1645  DEBUG(dbgs() << "LAA: Found a loop in "
1646  << TheLoop->getHeader()->getParent()->getName() << ": "
1647  << TheLoop->getHeader()->getName() << '\n');
1648 
1649  // We can only analyze innermost loops.
1650  if (!TheLoop->empty()) {
1651  DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1652  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1653  return false;
1654  }
1655 
1656  // We must have a single backedge.
1657  if (TheLoop->getNumBackEdges() != 1) {
1658  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1659  recordAnalysis("CFGNotUnderstood")
1660  << "loop control flow is not understood by analyzer";
1661  return false;
1662  }
1663 
1664  // We must have a single exiting block.
1665  if (!TheLoop->getExitingBlock()) {
1666  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1667  recordAnalysis("CFGNotUnderstood")
1668  << "loop control flow is not understood by analyzer";
1669  return false;
1670  }
1671 
1672  // We only handle bottom-tested loops, i.e. loop in which the condition is
1673  // checked at the end of each iteration. With that we can assume that all
1674  // instructions in the loop are executed the same number of times.
1675  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1676  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1677  recordAnalysis("CFGNotUnderstood")
1678  << "loop control flow is not understood by analyzer";
1679  return false;
1680  }
1681 
1682  // ScalarEvolution needs to be able to find the exit count.
1683  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1684  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1685  recordAnalysis("CantComputeNumberOfIterations")
1686  << "could not determine number of loop iterations";
1687  DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1688  return false;
1689  }
1690 
1691  return true;
1692 }
1693 
1694 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1695  const TargetLibraryInfo *TLI,
1696  DominatorTree *DT) {
1697  typedef SmallPtrSet<Value*, 16> ValueSet;
1698 
1699  // Holds the Load and Store instructions.
1702 
1703  // Holds all the different accesses in the loop.
1704  unsigned NumReads = 0;
1705  unsigned NumReadWrites = 0;
1706 
1707  PtrRtChecking->Pointers.clear();
1708  PtrRtChecking->Need = false;
1709 
1710  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1711 
1712  // For each block.
1713  for (BasicBlock *BB : TheLoop->blocks()) {
1714  // Scan the BB and collect legal loads and stores.
1715  for (Instruction &I : *BB) {
1716  // If this is a load, save it. If this instruction can read from memory
1717  // but is not a load, then we quit. Notice that we don't handle function
1718  // calls that read or write.
1719  if (I.mayReadFromMemory()) {
1720  // Many math library functions read the rounding mode. We will only
1721  // vectorize a loop if it contains known function calls that don't set
1722  // the flag. Therefore, it is safe to ignore this read from memory.
1723  auto *Call = dyn_cast<CallInst>(&I);
1724  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1725  continue;
1726 
1727  // If the function has an explicit vectorized counterpart, we can safely
1728  // assume that it can be vectorized.
1729  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1730  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1731  continue;
1732 
1733  auto *Ld = dyn_cast<LoadInst>(&I);
1734  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1735  recordAnalysis("NonSimpleLoad", Ld)
1736  << "read with atomic ordering or volatile read";
1737  DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1738  CanVecMem = false;
1739  return;
1740  }
1741  NumLoads++;
1742  Loads.push_back(Ld);
1743  DepChecker->addAccess(Ld);
1745  collectStridedAccess(Ld);
1746  continue;
1747  }
1748 
1749  // Save 'store' instructions. Abort if other instructions write to memory.
1750  if (I.mayWriteToMemory()) {
1751  auto *St = dyn_cast<StoreInst>(&I);
1752  if (!St) {
1753  recordAnalysis("CantVectorizeInstruction", St)
1754  << "instruction cannot be vectorized";
1755  CanVecMem = false;
1756  return;
1757  }
1758  if (!St->isSimple() && !IsAnnotatedParallel) {
1759  recordAnalysis("NonSimpleStore", St)
1760  << "write with atomic ordering or volatile write";
1761  DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1762  CanVecMem = false;
1763  return;
1764  }
1765  NumStores++;
1766  Stores.push_back(St);
1767  DepChecker->addAccess(St);
1769  collectStridedAccess(St);
1770  }
1771  } // Next instr.
1772  } // Next block.
1773 
1774  // Now we have two lists that hold the loads and the stores.
1775  // Next, we find the pointers that they use.
1776 
1777  // Check if we see any stores. If there are no stores, then we don't
1778  // care if the pointers are *restrict*.
1779  if (!Stores.size()) {
1780  DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1781  CanVecMem = true;
1782  return;
1783  }
1784 
1785  MemoryDepChecker::DepCandidates DependentAccesses;
1786  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1787  AA, LI, DependentAccesses, *PSE);
1788 
1789  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1790  // multiple times on the same object. If the ptr is accessed twice, once
1791  // for read and once for write, it will only appear once (on the write
1792  // list). This is okay, since we are going to check for conflicts between
1793  // writes and between reads and writes, but not between reads and reads.
1794  ValueSet Seen;
1795 
1796  for (StoreInst *ST : Stores) {
1797  Value *Ptr = ST->getPointerOperand();
1798  // Check for store to loop invariant address.
1799  StoreToLoopInvariantAddress |= isUniform(Ptr);
1800  // If we did *not* see this pointer before, insert it to the read-write
1801  // list. At this phase it is only a 'write' list.
1802  if (Seen.insert(Ptr).second) {
1803  ++NumReadWrites;
1804 
1806  // The TBAA metadata could have a control dependency on the predication
1807  // condition, so we cannot rely on it when determining whether or not we
1808  // need runtime pointer checks.
1809  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1810  Loc.AATags.TBAA = nullptr;
1811 
1812  Accesses.addStore(Loc);
1813  }
1814  }
1815 
1816  if (IsAnnotatedParallel) {
1817  DEBUG(dbgs()
1818  << "LAA: A loop annotated parallel, ignore memory dependency "
1819  << "checks.\n");
1820  CanVecMem = true;
1821  return;
1822  }
1823 
1824  for (LoadInst *LD : Loads) {
1825  Value *Ptr = LD->getPointerOperand();
1826  // If we did *not* see this pointer before, insert it to the
1827  // read list. If we *did* see it before, then it is already in
1828  // the read-write list. This allows us to vectorize expressions
1829  // such as A[i] += x; Because the address of A[i] is a read-write
1830  // pointer. This only works if the index of A[i] is consecutive.
1831  // If the address of i is unknown (for example A[B[i]]) then we may
1832  // read a few words, modify, and write a few words, and some of the
1833  // words may be written to the same address.
1834  bool IsReadOnlyPtr = false;
1835  if (Seen.insert(Ptr).second ||
1836  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1837  ++NumReads;
1838  IsReadOnlyPtr = true;
1839  }
1840 
1842  // The TBAA metadata could have a control dependency on the predication
1843  // condition, so we cannot rely on it when determining whether or not we
1844  // need runtime pointer checks.
1845  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1846  Loc.AATags.TBAA = nullptr;
1847 
1848  Accesses.addLoad(Loc, IsReadOnlyPtr);
1849  }
1850 
1851  // If we write (or read-write) to a single destination and there are no
1852  // other reads in this loop then is it safe to vectorize.
1853  if (NumReadWrites == 1 && NumReads == 0) {
1854  DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1855  CanVecMem = true;
1856  return;
1857  }
1858 
1859  // Build dependence sets and check whether we need a runtime pointer bounds
1860  // check.
1861  Accesses.buildDependenceSets();
1862 
1863  // Find pointers with computable bounds. We are going to use this information
1864  // to place a runtime bound check.
1865  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1866  TheLoop, SymbolicStrides);
1867  if (!CanDoRTIfNeeded) {
1868  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1869  DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1870  << "the array bounds.\n");
1871  CanVecMem = false;
1872  return;
1873  }
1874 
1875  DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1876 
1877  CanVecMem = true;
1878  if (Accesses.isDependencyCheckNeeded()) {
1879  DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1880  CanVecMem = DepChecker->areDepsSafe(
1881  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1882  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1883 
1884  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1885  DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1886 
1887  // Clear the dependency checks. We assume they are not needed.
1888  Accesses.resetDepChecks(*DepChecker);
1889 
1890  PtrRtChecking->reset();
1891  PtrRtChecking->Need = true;
1892 
1893  auto *SE = PSE->getSE();
1894  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1895  SymbolicStrides, true);
1896 
1897  // Check that we found the bounds for the pointer.
1898  if (!CanDoRTIfNeeded) {
1899  recordAnalysis("CantCheckMemDepsAtRunTime")
1900  << "cannot check memory dependencies at runtime";
1901  DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1902  CanVecMem = false;
1903  return;
1904  }
1905 
1906  CanVecMem = true;
1907  }
1908  }
1909 
1910  if (CanVecMem)
1911  DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1912  << (PtrRtChecking->Need ? "" : " don't")
1913  << " need runtime memory checks.\n");
1914  else {
1915  recordAnalysis("UnsafeMemDep")
1916  << "unsafe dependent memory operations in loop. Use "
1917  "#pragma loop distribute(enable) to allow loop distribution "
1918  "to attempt to isolate the offending operations into a separate "
1919  "loop";
1920  DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1921  }
1922 }
1923 
1925  DominatorTree *DT) {
1926  assert(TheLoop->contains(BB) && "Unknown block used");
1927 
1928  // Blocks that do not dominate the latch need predication.
1929  BasicBlock* Latch = TheLoop->getLoopLatch();
1930  return !DT->dominates(BB, Latch);
1931 }
1932 
1933 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
1934  Instruction *I) {
1935  assert(!Report && "Multiple reports generated");
1936 
1937  Value *CodeRegion = TheLoop->getHeader();
1938  DebugLoc DL = TheLoop->getStartLoc();
1939 
1940  if (I) {
1941  CodeRegion = I->getParent();
1942  // If there is no debug location attached to the instruction, revert back to
1943  // using the loop's.
1944  if (I->getDebugLoc())
1945  DL = I->getDebugLoc();
1946  }
1947 
1948  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
1949  CodeRegion);
1950  return *Report;
1951 }
1952 
1954  auto *SE = PSE->getSE();
1955  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
1956  // never considered uniform.
1957  // TODO: Is this really what we want? Even without FP SCEV, we may want some
1958  // trivially loop-invariant FP values to be considered uniform.
1959  if (!SE->isSCEVable(V->getType()))
1960  return false;
1961  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
1962 }
1963 
1964 // FIXME: this function is currently a duplicate of the one in
1965 // LoopVectorize.cpp.
1967  Instruction *Loc) {
1968  if (FirstInst)
1969  return FirstInst;
1970  if (Instruction *I = dyn_cast<Instruction>(V))
1971  return I->getParent() == Loc->getParent() ? I : nullptr;
1972  return nullptr;
1973 }
1974 
1975 namespace {
1976 
1977 /// \brief IR Values for the lower and upper bounds of a pointer evolution. We
1978 /// need to use value-handles because SCEV expansion can invalidate previously
1979 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
1980 /// a previous one.
1981 struct PointerBounds {
1982  TrackingVH<Value> Start;
1984 };
1985 
1986 } // end anonymous namespace
1987 
1988 /// \brief Expand code for the lower and upper bound of the pointer group \p CG
1989 /// in \p TheLoop. \return the values for the bounds.
1990 static PointerBounds
1992  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
1993  const RuntimePointerChecking &PtrRtChecking) {
1994  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
1995  const SCEV *Sc = SE->getSCEV(Ptr);
1996 
1997  unsigned AS = Ptr->getType()->getPointerAddressSpace();
1998  LLVMContext &Ctx = Loc->getContext();
1999 
2000  // Use this type for pointer arithmetic.
2001  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2002 
2003  if (SE->isLoopInvariant(Sc, TheLoop)) {
2004  DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
2005  << "\n");
2006  // Ptr could be in the loop body. If so, expand a new one at the correct
2007  // location.
2008  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2009  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2010  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2011  : Ptr;
2012  // We must return a half-open range, which means incrementing Sc.
2013  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2014  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2015  return {NewPtr, NewPtrPlusOne};
2016  } else {
2017  Value *Start = nullptr, *End = nullptr;
2018  DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2019  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2020  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2021  DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
2022  return {Start, End};
2023  }
2024 }
2025 
2026 /// \brief Turns a collection of checks into a collection of expanded upper and
2027 /// lower bounds for both pointers in the check.
2030  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2031  const RuntimePointerChecking &PtrRtChecking) {
2033 
2034  // Here we're relying on the SCEV Expander's cache to only emit code for the
2035  // same bounds once.
2036  transform(
2037  PointerChecks, std::back_inserter(ChecksWithBounds),
2039  PointerBounds
2040  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2041  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2042  return std::make_pair(First, Second);
2043  });
2044 
2045  return ChecksWithBounds;
2046 }
2047 
2048 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2049  Instruction *Loc,
2051  const {
2052  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2053  auto *SE = PSE->getSE();
2054  SCEVExpander Exp(*SE, DL, "induction");
2055  auto ExpandedChecks =
2056  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2057 
2058  LLVMContext &Ctx = Loc->getContext();
2059  Instruction *FirstInst = nullptr;
2060  IRBuilder<> ChkBuilder(Loc);
2061  // Our instructions might fold to a constant.
2062  Value *MemoryRuntimeCheck = nullptr;
2063 
2064  for (const auto &Check : ExpandedChecks) {
2065  const PointerBounds &A = Check.first, &B = Check.second;
2066  // Check if two pointers (A and B) conflict where conflict is computed as:
2067  // start(A) <= end(B) && start(B) <= end(A)
2068  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2069  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2070 
2071  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2072  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2073  "Trying to bounds check pointers with different address spaces");
2074 
2075  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2076  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2077 
2078  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2079  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2080  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2081  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2082 
2083  // [A|B].Start points to the first accessed byte under base [A|B].
2084  // [A|B].End points to the last accessed byte, plus one.
2085  // There is no conflict when the intervals are disjoint:
2086  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2087  //
2088  // bound0 = (B.Start < A.End)
2089  // bound1 = (A.Start < B.End)
2090  // IsConflict = bound0 & bound1
2091  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2092  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2093  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2094  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2095  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2096  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2097  if (MemoryRuntimeCheck) {
2098  IsConflict =
2099  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2100  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2101  }
2102  MemoryRuntimeCheck = IsConflict;
2103  }
2104 
2105  if (!MemoryRuntimeCheck)
2106  return std::make_pair(nullptr, nullptr);
2107 
2108  // We have to do this trickery because the IRBuilder might fold the check to a
2109  // constant expression in which case there is no Instruction anchored in a
2110  // the block.
2111  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2112  ConstantInt::getTrue(Ctx));
2113  ChkBuilder.Insert(Check, "memcheck.conflict");
2114  FirstInst = getFirstInst(FirstInst, Check, Loc);
2115  return std::make_pair(FirstInst, Check);
2116 }
2117 
2118 std::pair<Instruction *, Instruction *>
2120  if (!PtrRtChecking->Need)
2121  return std::make_pair(nullptr, nullptr);
2122 
2123  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2124 }
2125 
2126 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2127  Value *Ptr = nullptr;
2128  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2129  Ptr = LI->getPointerOperand();
2130  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2131  Ptr = SI->getPointerOperand();
2132  else
2133  return;
2134 
2135  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2136  if (!Stride)
2137  return;
2138 
2139  DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2140  "versioning:");
2141  DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2142 
2143  // Avoid adding the "Stride == 1" predicate when we know that
2144  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2145  // or zero iteration loop, as Trip-Count <= Stride == 1.
2146  //
2147  // TODO: We are currently not making a very informed decision on when it is
2148  // beneficial to apply stride versioning. It might make more sense that the
2149  // users of this analysis (such as the vectorizer) will trigger it, based on
2150  // their specific cost considerations; For example, in cases where stride
2151  // versioning does not help resolving memory accesses/dependences, the
2152  // vectorizer should evaluate the cost of the runtime test, and the benefit
2153  // of various possible stride specializations, considering the alternatives
2154  // of using gather/scatters (if available).
2155 
2156  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2157  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2158 
2159  // Match the types so we can compare the stride and the BETakenCount.
2160  // The Stride can be positive/negative, so we sign extend Stride;
2161  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
2162  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2163  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2164  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2165  const SCEV *CastedStride = StrideExpr;
2166  const SCEV *CastedBECount = BETakenCount;
2167  ScalarEvolution *SE = PSE->getSE();
2168  if (BETypeSize >= StrideTypeSize)
2169  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2170  else
2171  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2172  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2173  // Since TripCount == BackEdgeTakenCount + 1, checking:
2174  // "Stride >= TripCount" is equivalent to checking:
2175  // Stride - BETakenCount > 0
2176  if (SE->isKnownPositive(StrideMinusBETaken)) {
2177  DEBUG(dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2178  "Stride==1 predicate will imply that the loop executes "
2179  "at most once.\n");
2180  return;
2181  }
2182  DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2183 
2184  SymbolicStrides[Ptr] = Stride;
2185  StrideSet.insert(Stride);
2186 }
2187 
2189  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2190  DominatorTree *DT, LoopInfo *LI)
2191  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2192  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2193  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2194  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2195  StoreToLoopInvariantAddress(false) {
2196  if (canAnalyzeLoop())
2197  analyzeLoop(AA, LI, TLI, DT);
2198 }
2199 
2200 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2201  if (CanVecMem) {
2202  OS.indent(Depth) << "Memory dependences are safe";
2203  if (MaxSafeDepDistBytes != -1ULL)
2204  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2205  << " bytes";
2206  if (PtrRtChecking->Need)
2207  OS << " with run-time checks";
2208  OS << "\n";
2209  }
2210 
2211  if (Report)
2212  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2213 
2214  if (auto *Dependences = DepChecker->getDependences()) {
2215  OS.indent(Depth) << "Dependences:\n";
2216  for (auto &Dep : *Dependences) {
2217  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2218  OS << "\n";
2219  }
2220  } else
2221  OS.indent(Depth) << "Too many dependences, not recorded\n";
2222 
2223  // List the pair of accesses need run-time checks to prove independence.
2224  PtrRtChecking->print(OS, Depth);
2225  OS << "\n";
2226 
2227  OS.indent(Depth) << "Store to invariant address was "
2228  << (StoreToLoopInvariantAddress ? "" : "not ")
2229  << "found in loop.\n";
2230 
2231  OS.indent(Depth) << "SCEV assumptions:\n";
2232  PSE->getUnionPredicate().print(OS, Depth);
2233 
2234  OS << "\n";
2235 
2236  OS.indent(Depth) << "Expressions re-written:\n";
2237  PSE->print(OS, Depth);
2238 }
2239 
2241  auto &LAI = LoopAccessInfoMap[L];
2242 
2243  if (!LAI)
2244  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2245 
2246  return *LAI.get();
2247 }
2248 
2250  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2251 
2252  for (Loop *TopLevelLoop : *LI)
2253  for (Loop *L : depth_first(TopLevelLoop)) {
2254  OS.indent(2) << L->getHeader()->getName() << ":\n";
2255  auto &LAI = LAA.getInfo(L);
2256  LAI.print(OS, 4);
2257  }
2258 }
2259 
2261  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2262  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2263  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2264  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2265  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2266  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2267 
2268  return false;
2269 }
2270 
2276 
2277  AU.setPreservesAll();
2278 }
2279 
2281 static const char laa_name[] = "Loop Access Analysis";
2282 #define LAA_NAME "loop-accesses"
2283 
2290 
2292 
2295  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2296 }
2297 
2298 namespace llvm {
2299 
2301  return new LoopAccessLegacyAnalysis();
2302  }
2303 
2304 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1779
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static unsigned RuntimeMemoryCheckThreshold
\brief When performing memory disambiguation checks at runtime do not make more than this number of c...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:212
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:244
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:235
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:656
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1562
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:728
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Pass * createLAAPass()
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:344
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:944
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1488
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:87
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr it the function does no...
Definition: BasicBlock.cpp:116
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:221
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1448
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1554
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
An instruction for storing to memory.
Definition: Instructions.h:306
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1083
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:406
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:384
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:337
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:221
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
static const unsigned End
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:81
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const AMDGPUAS & AS
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:345
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:188
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:239
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:516
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:576
Class for arbitrary precision integers.
Definition: APInt.h:69
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:403
Basic Alias true
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:284
Dependece between memory access instructions.
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
iterator end()
Definition: DenseMap.h:79
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static Value * getPointerOperand(Value *I)
Take the pointer operand from the Load/Store instruction.
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1067
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:890
bool empty() const
Definition: LoopInfo.h:146
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:719
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:304
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:386
A vector that has set insertion semantics.
Definition: SetVector.h:41
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
#define DEBUG(X)
Definition: Debug.h:118
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:958
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:422
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:65