LLVM  8.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  if (auto *CI = dyn_cast<CastInst>(V))
140  if (CI->getOperand(0)->getType()->isIntegerTy())
141  return CI->getOperand(0);
142  return V;
143 }
144 
146  const ValueToValueMap &PtrToStride,
147  Value *Ptr, Value *OrigPtr) {
148  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149 
150  // If there is an entry in the map return the SCEV of the pointer with the
151  // symbolic stride replaced by one.
153  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154  if (SI != PtrToStride.end()) {
155  Value *StrideVal = SI->second;
156 
157  // Strip casts.
158  StrideVal = stripIntegerCast(StrideVal);
159 
160  ScalarEvolution *SE = PSE.getSE();
161  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162  const auto *CT =
163  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164 
165  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166  auto *Expr = PSE.getSCEV(Ptr);
167 
168  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
169  << " by: " << *Expr << "\n");
170  return Expr;
171  }
172 
173  // Otherwise, just return the SCEV of the original pointer.
174  return OrigSCEV;
175 }
176 
177 /// Calculate Start and End points of memory access.
178 /// Let's assume A is the first access and B is a memory access on N-th loop
179 /// iteration. Then B is calculated as:
180 /// B = A + Step*N .
181 /// Step value may be positive or negative.
182 /// N is a calculated back-edge taken count:
183 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184 /// Start and End points are calculated in the following way:
185 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186 /// where SizeOfElt is the size of single memory access in bytes.
187 ///
188 /// There is no conflict when the intervals are disjoint:
189 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191  unsigned DepSetId, unsigned ASId,
192  const ValueToValueMap &Strides,
194  // Get the stride replaced scev.
195  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196  ScalarEvolution *SE = PSE.getSE();
197 
198  const SCEV *ScStart;
199  const SCEV *ScEnd;
200 
201  if (SE->isLoopInvariant(Sc, Lp))
202  ScStart = ScEnd = Sc;
203  else {
204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205  assert(AR && "Invalid addrec expression");
206  const SCEV *Ex = PSE.getBackedgeTakenCount();
207 
208  ScStart = AR->getStart();
209  ScEnd = AR->evaluateAtIteration(Ex, *SE);
210  const SCEV *Step = AR->getStepRecurrence(*SE);
211 
212  // For expressions with negative step, the upper bound is ScStart and the
213  // lower bound is ScEnd.
214  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215  if (CStep->getValue()->isNegative())
216  std::swap(ScStart, ScEnd);
217  } else {
218  // Fallback case: the step is not constant, but we can still
219  // get the upper and lower bounds of the interval by using min/max
220  // expressions.
221  ScStart = SE->getUMinExpr(ScStart, ScEnd);
222  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223  }
224  // Add the size of the pointed element to ScEnd.
225  unsigned EltSize =
227  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229  }
230 
231  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232 }
233 
235 RuntimePointerChecking::generateChecks() const {
237 
238  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242 
243  if (needsChecking(CGI, CGJ))
244  Checks.push_back(std::make_pair(&CGI, &CGJ));
245  }
246  }
247  return Checks;
248 }
249 
250 void RuntimePointerChecking::generateChecks(
251  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252  assert(Checks.empty() && "Checks is not empty");
253  groupChecks(DepCands, UseDependencies);
254  Checks = generateChecks();
255 }
256 
258  const CheckingPtrGroup &N) const {
259  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261  if (needsChecking(M.Members[I], N.Members[J]))
262  return true;
263  return false;
264 }
265 
266 /// Compare \p I and \p J and return the minimum.
267 /// Return nullptr in case we couldn't find an answer.
268 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269  ScalarEvolution *SE) {
270  const SCEV *Diff = SE->getMinusSCEV(J, I);
271  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272 
273  if (!C)
274  return nullptr;
275  if (C->getValue()->isNegative())
276  return J;
277  return I;
278 }
279 
281  const SCEV *Start = RtCheck.Pointers[Index].Start;
282  const SCEV *End = RtCheck.Pointers[Index].End;
283 
284  // Compare the starts and ends with the known minimum and maximum
285  // of this set. We need to know how we compare against the min/max
286  // of the set in order to be able to emit memchecks.
287  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288  if (!Min0)
289  return false;
290 
291  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292  if (!Min1)
293  return false;
294 
295  // Update the low bound expression if we've found a new min value.
296  if (Min0 == Start)
297  Low = Start;
298 
299  // Update the high bound expression if we've found a new max value.
300  if (Min1 != End)
301  High = End;
302 
303  Members.push_back(Index);
304  return true;
305 }
306 
307 void RuntimePointerChecking::groupChecks(
308  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309  // We build the groups from dependency candidates equivalence classes
310  // because:
311  // - We know that pointers in the same equivalence class share
312  // the same underlying object and therefore there is a chance
313  // that we can compare pointers
314  // - We wouldn't be able to merge two pointers for which we need
315  // to emit a memcheck. The classes in DepCands are already
316  // conveniently built such that no two pointers in the same
317  // class need checking against each other.
318 
319  // We use the following (greedy) algorithm to construct the groups
320  // For every pointer in the equivalence class:
321  // For each existing group:
322  // - if the difference between this pointer and the min/max bounds
323  // of the group is a constant, then make the pointer part of the
324  // group and update the min/max bounds of that group as required.
325 
326  CheckingGroups.clear();
327 
328  // If we need to check two pointers to the same underlying object
329  // with a non-constant difference, we shouldn't perform any pointer
330  // grouping with those pointers. This is because we can easily get
331  // into cases where the resulting check would return false, even when
332  // the accesses are safe.
333  //
334  // The following example shows this:
335  // for (i = 0; i < 1000; ++i)
336  // a[5000 + i * m] = a[i] + a[i + 9000]
337  //
338  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339  // (0, 10000) which is always false. However, if m is 1, there is no
340  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341  // us to perform an accurate check in this case.
342  //
343  // The above case requires that we have an UnknownDependence between
344  // accesses to the same underlying object. This cannot happen unless
345  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
346  // is also false. In this case we will use the fallback path and create
347  // separate checking groups for all pointers.
348 
349  // If we don't have the dependency partitions, construct a new
350  // checking pointer group for each pointer. This is also required
351  // for correctness, because in this case we can have checking between
352  // pointers to the same underlying object.
353  if (!UseDependencies) {
354  for (unsigned I = 0; I < Pointers.size(); ++I)
355  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356  return;
357  }
358 
359  unsigned TotalComparisons = 0;
360 
361  DenseMap<Value *, unsigned> PositionMap;
362  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363  PositionMap[Pointers[Index].PointerValue] = Index;
364 
365  // We need to keep track of what pointers we've already seen so we
366  // don't process them twice.
368 
369  // Go through all equivalence classes, get the "pointer check groups"
370  // and add them to the overall solution. We use the order in which accesses
371  // appear in 'Pointers' to enforce determinism.
372  for (unsigned I = 0; I < Pointers.size(); ++I) {
373  // We've seen this pointer before, and therefore already processed
374  // its equivalence class.
375  if (Seen.count(I))
376  continue;
377 
378  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379  Pointers[I].IsWritePtr);
380 
382  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383 
384  // Because DepCands is constructed by visiting accesses in the order in
385  // which they appear in alias sets (which is deterministic) and the
386  // iteration order within an equivalence class member is only dependent on
387  // the order in which unions and insertions are performed on the
388  // equivalence class, the iteration order is deterministic.
389  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390  MI != ME; ++MI) {
391  unsigned Pointer = PositionMap[MI->getPointer()];
392  bool Merged = false;
393  // Mark this pointer as seen.
394  Seen.insert(Pointer);
395 
396  // Go through all the existing sets and see if we can find one
397  // which can include this pointer.
398  for (CheckingPtrGroup &Group : Groups) {
399  // Don't perform more than a certain amount of comparisons.
400  // This should limit the cost of grouping the pointers to something
401  // reasonable. If we do end up hitting this threshold, the algorithm
402  // will create separate groups for all remaining pointers.
403  if (TotalComparisons > MemoryCheckMergeThreshold)
404  break;
405 
406  TotalComparisons++;
407 
408  if (Group.addPointer(Pointer)) {
409  Merged = true;
410  break;
411  }
412  }
413 
414  if (!Merged)
415  // We couldn't add this pointer to any existing set or the threshold
416  // for the number of comparisons has been reached. Create a new group
417  // to hold the current pointer.
418  Groups.push_back(CheckingPtrGroup(Pointer, *this));
419  }
420 
421  // We've computed the grouped checks for this partition.
422  // Save the results and continue with the next one.
423  std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
424  }
425 }
426 
428  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429  unsigned PtrIdx2) {
430  return (PtrToPartition[PtrIdx1] != -1 &&
431  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432 }
433 
434 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435  const PointerInfo &PointerI = Pointers[I];
436  const PointerInfo &PointerJ = Pointers[J];
437 
438  // No need to check if two readonly pointers intersect.
439  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440  return false;
441 
442  // Only need to check pointers between two different dependency sets.
443  if (PointerI.DependencySetId == PointerJ.DependencySetId)
444  return false;
445 
446  // Only need to check pointers in the same alias set.
447  if (PointerI.AliasSetId != PointerJ.AliasSetId)
448  return false;
449 
450  return true;
451 }
452 
454  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455  unsigned Depth) const {
456  unsigned N = 0;
457  for (const auto &Check : Checks) {
458  const auto &First = Check.first->Members, &Second = Check.second->Members;
459 
460  OS.indent(Depth) << "Check " << N++ << ":\n";
461 
462  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463  for (unsigned K = 0; K < First.size(); ++K)
464  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465 
466  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467  for (unsigned K = 0; K < Second.size(); ++K)
468  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469  }
470 }
471 
473 
474  OS.indent(Depth) << "Run-time memory checks:\n";
475  printChecks(OS, Checks, Depth);
476 
477  OS.indent(Depth) << "Grouped accesses:\n";
478  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479  const auto &CG = CheckingGroups[I];
480 
481  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483  << ")\n";
484  for (unsigned J = 0; J < CG.Members.size(); ++J) {
485  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486  << "\n";
487  }
488  }
489 }
490 
491 namespace {
492 
493 /// Analyses memory accesses in a loop.
494 ///
495 /// Checks whether run time pointer checks are needed and builds sets for data
496 /// dependence checking.
497 class AccessAnalysis {
498 public:
499  /// Read or write access location.
500  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502 
503  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
506  : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
507  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
508 
509  /// Register a load and whether it is only read from.
510  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511  Value *Ptr = const_cast<Value*>(Loc.Ptr);
512  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
513  Accesses.insert(MemAccessInfo(Ptr, false));
514  if (IsReadOnly)
515  ReadOnlyPtr.insert(Ptr);
516  }
517 
518  /// Register a store.
519  void addStore(MemoryLocation &Loc) {
520  Value *Ptr = const_cast<Value*>(Loc.Ptr);
521  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
522  Accesses.insert(MemAccessInfo(Ptr, true));
523  }
524 
525  /// Check if we can emit a run-time no-alias check for \p Access.
526  ///
527  /// Returns true if we can emit a run-time no alias check for \p Access.
528  /// If we can check this access, this also adds it to a dependence set and
529  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530  /// we will attempt to use additional run-time checks in order to get
531  /// the bounds of the pointer.
532  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533  MemAccessInfo Access,
534  const ValueToValueMap &Strides,
535  DenseMap<Value *, unsigned> &DepSetId,
536  Loop *TheLoop, unsigned &RunningDepId,
537  unsigned ASId, bool ShouldCheckStride,
538  bool Assume);
539 
540  /// Check whether we can check the pointers at runtime for
541  /// non-intersection.
542  ///
543  /// Returns true if we need no check or if we do and we can generate them
544  /// (i.e. the pointers have computable bounds).
545  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546  Loop *TheLoop, const ValueToValueMap &Strides,
547  bool ShouldCheckWrap = false);
548 
549  /// Goes over all memory accesses, checks whether a RT check is needed
550  /// and builds sets of dependent accesses.
551  void buildDependenceSets() {
552  processMemAccesses();
553  }
554 
555  /// Initial processing of memory accesses determined that we need to
556  /// perform dependency checking.
557  ///
558  /// Note that this can later be cleared if we retry memcheck analysis without
559  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
560  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562  /// We decided that no dependence analysis would be used. Reset the state.
563  void resetDepChecks(MemoryDepChecker &DepChecker) {
564  CheckDeps.clear();
565  DepChecker.clearDependences();
566  }
567 
568  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571  typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573  /// Go over all memory access and check whether runtime pointer checks
574  /// are needed and build sets of dependency check candidates.
575  void processMemAccesses();
576 
577  /// Set of all accesses.
578  PtrAccessSet Accesses;
579 
580  const DataLayout &DL;
581 
582  /// The loop being checked.
583  const Loop *TheLoop;
584 
585  /// List of accesses that need a further dependence check.
586  MemAccessInfoList CheckDeps;
587 
588  /// Set of pointers that are read only.
589  SmallPtrSet<Value*, 16> ReadOnlyPtr;
590 
591  /// An alias set tracker to partition the access set by underlying object and
592  //intrinsic property (such as TBAA metadata).
593  AliasSetTracker AST;
594 
595  LoopInfo *LI;
596 
597  /// Sets of potentially dependent accesses - members of one set share an
598  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
599  /// dependence check.
601 
602  /// Initial processing of memory accesses determined that we may need
603  /// to add memchecks. Perform the analysis to determine the necessary checks.
604  ///
605  /// Note that, this is different from isDependencyCheckNeeded. When we retry
606  /// memcheck analysis without dependency checking
607  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
608  /// while this remains set if we have potentially dependent accesses.
609  bool IsRTCheckAnalysisNeeded;
610 
611  /// The SCEV predicate containing all the SCEV-related assumptions.
613 };
614 
615 } // end anonymous namespace
616 
617 /// Check whether a pointer can participate in a runtime bounds check.
618 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
619 /// by adding run-time checks (overflow checks) if necessary.
621  const ValueToValueMap &Strides, Value *Ptr,
622  Loop *L, bool Assume) {
623  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624 
625  // The bounds for loop-invariant pointer is trivial.
626  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
627  return true;
628 
629  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
630 
631  if (!AR && Assume)
632  AR = PSE.getAsAddRec(Ptr);
633 
634  if (!AR)
635  return false;
636 
637  return AR->isAffine();
638 }
639 
640 /// Check whether a pointer address cannot wrap.
642  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
643  const SCEV *PtrScev = PSE.getSCEV(Ptr);
644  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
645  return true;
646 
647  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
648  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
649  return true;
650 
651  return false;
652 }
653 
654 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
655  MemAccessInfo Access,
656  const ValueToValueMap &StridesMap,
657  DenseMap<Value *, unsigned> &DepSetId,
658  Loop *TheLoop, unsigned &RunningDepId,
659  unsigned ASId, bool ShouldCheckWrap,
660  bool Assume) {
661  Value *Ptr = Access.getPointer();
662 
663  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
664  return false;
665 
666  // When we run after a failing dependency check we have to make sure
667  // we don't have wrapping pointers.
668  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
669  auto *Expr = PSE.getSCEV(Ptr);
670  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
671  return false;
672  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
673  }
674 
675  // The id of the dependence set.
676  unsigned DepId;
677 
678  if (isDependencyCheckNeeded()) {
679  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
680  unsigned &LeaderId = DepSetId[Leader];
681  if (!LeaderId)
682  LeaderId = RunningDepId++;
683  DepId = LeaderId;
684  } else
685  // Each access has its own dependence set.
686  DepId = RunningDepId++;
687 
688  bool IsWrite = Access.getInt();
689  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
690  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
691 
692  return true;
693  }
694 
695 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
696  ScalarEvolution *SE, Loop *TheLoop,
697  const ValueToValueMap &StridesMap,
698  bool ShouldCheckWrap) {
699  // Find pointers with computable bounds. We are going to use this information
700  // to place a runtime bound check.
701  bool CanDoRT = true;
702 
703  bool NeedRTCheck = false;
704  if (!IsRTCheckAnalysisNeeded) return true;
705 
706  bool IsDepCheckNeeded = isDependencyCheckNeeded();
707 
708  // We assign a consecutive id to access from different alias sets.
709  // Accesses between different groups doesn't need to be checked.
710  unsigned ASId = 1;
711  for (auto &AS : AST) {
712  int NumReadPtrChecks = 0;
713  int NumWritePtrChecks = 0;
714  bool CanDoAliasSetRT = true;
715 
716  // We assign consecutive id to access from different dependence sets.
717  // Accesses within the same set don't need a runtime check.
718  unsigned RunningDepId = 1;
720 
722 
723  for (auto A : AS) {
724  Value *Ptr = A.getValue();
725  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
726  MemAccessInfo Access(Ptr, IsWrite);
727 
728  if (IsWrite)
729  ++NumWritePtrChecks;
730  else
731  ++NumReadPtrChecks;
732 
733  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
734  RunningDepId, ASId, ShouldCheckWrap, false)) {
735  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
736  Retries.push_back(Access);
737  CanDoAliasSetRT = false;
738  }
739  }
740 
741  // If we have at least two writes or one write and a read then we need to
742  // check them. But there is no need to checks if there is only one
743  // dependence set for this alias set.
744  //
745  // Note that this function computes CanDoRT and NeedRTCheck independently.
746  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
747  // for which we couldn't find the bounds but we don't actually need to emit
748  // any checks so it does not matter.
749  bool NeedsAliasSetRTCheck = false;
750  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
751  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
752  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
753 
754  // We need to perform run-time alias checks, but some pointers had bounds
755  // that couldn't be checked.
756  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
757  // Reset the CanDoSetRt flag and retry all accesses that have failed.
758  // We know that we need these checks, so we can now be more aggressive
759  // and add further checks if required (overflow checks).
760  CanDoAliasSetRT = true;
761  for (auto Access : Retries)
762  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
763  TheLoop, RunningDepId, ASId,
764  ShouldCheckWrap, /*Assume=*/true)) {
765  CanDoAliasSetRT = false;
766  break;
767  }
768  }
769 
770  CanDoRT &= CanDoAliasSetRT;
771  NeedRTCheck |= NeedsAliasSetRTCheck;
772  ++ASId;
773  }
774 
775  // If the pointers that we would use for the bounds comparison have different
776  // address spaces, assume the values aren't directly comparable, so we can't
777  // use them for the runtime check. We also have to assume they could
778  // overlap. In the future there should be metadata for whether address spaces
779  // are disjoint.
780  unsigned NumPointers = RtCheck.Pointers.size();
781  for (unsigned i = 0; i < NumPointers; ++i) {
782  for (unsigned j = i + 1; j < NumPointers; ++j) {
783  // Only need to check pointers between two different dependency sets.
784  if (RtCheck.Pointers[i].DependencySetId ==
785  RtCheck.Pointers[j].DependencySetId)
786  continue;
787  // Only need to check pointers in the same alias set.
788  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
789  continue;
790 
791  Value *PtrI = RtCheck.Pointers[i].PointerValue;
792  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
793 
794  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
795  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
796  if (ASi != ASj) {
797  LLVM_DEBUG(
798  dbgs() << "LAA: Runtime check would require comparison between"
799  " different address spaces\n");
800  return false;
801  }
802  }
803  }
804 
805  if (NeedRTCheck && CanDoRT)
806  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
807 
808  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
809  << " pointer comparisons.\n");
810 
811  RtCheck.Need = NeedRTCheck;
812 
813  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
814  if (!CanDoRTIfNeeded)
815  RtCheck.reset();
816  return CanDoRTIfNeeded;
817 }
818 
819 void AccessAnalysis::processMemAccesses() {
820  // We process the set twice: first we process read-write pointers, last we
821  // process read-only pointers. This allows us to skip dependence tests for
822  // read-only pointers.
823 
824  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
825  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
826  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
827  LLVM_DEBUG({
828  for (auto A : Accesses)
829  dbgs() << "\t" << *A.getPointer() << " (" <<
830  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
831  "read-only" : "read")) << ")\n";
832  });
833 
834  // The AliasSetTracker has nicely partitioned our pointers by metadata
835  // compatibility and potential for underlying-object overlap. As a result, we
836  // only need to check for potential pointer dependencies within each alias
837  // set.
838  for (auto &AS : AST) {
839  // Note that both the alias-set tracker and the alias sets themselves used
840  // linked lists internally and so the iteration order here is deterministic
841  // (matching the original instruction order within each set).
842 
843  bool SetHasWrite = false;
844 
845  // Map of pointers to last access encountered.
846  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
847  UnderlyingObjToAccessMap ObjToLastAccess;
848 
849  // Set of access to check after all writes have been processed.
850  PtrAccessSet DeferredAccesses;
851 
852  // Iterate over each alias set twice, once to process read/write pointers,
853  // and then to process read-only pointers.
854  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
855  bool UseDeferred = SetIteration > 0;
856  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
857 
858  for (auto AV : AS) {
859  Value *Ptr = AV.getValue();
860 
861  // For a single memory access in AliasSetTracker, Accesses may contain
862  // both read and write, and they both need to be handled for CheckDeps.
863  for (auto AC : S) {
864  if (AC.getPointer() != Ptr)
865  continue;
866 
867  bool IsWrite = AC.getInt();
868 
869  // If we're using the deferred access set, then it contains only
870  // reads.
871  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
872  if (UseDeferred && !IsReadOnlyPtr)
873  continue;
874  // Otherwise, the pointer must be in the PtrAccessSet, either as a
875  // read or a write.
876  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
877  S.count(MemAccessInfo(Ptr, false))) &&
878  "Alias-set pointer not in the access set?");
879 
880  MemAccessInfo Access(Ptr, IsWrite);
881  DepCands.insert(Access);
882 
883  // Memorize read-only pointers for later processing and skip them in
884  // the first round (they need to be checked after we have seen all
885  // write pointers). Note: we also mark pointer that are not
886  // consecutive as "read-only" pointers (so that we check
887  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
888  if (!UseDeferred && IsReadOnlyPtr) {
889  DeferredAccesses.insert(Access);
890  continue;
891  }
892 
893  // If this is a write - check other reads and writes for conflicts. If
894  // this is a read only check other writes for conflicts (but only if
895  // there is no other write to the ptr - this is an optimization to
896  // catch "a[i] = a[i] + " without having to do a dependence check).
897  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
898  CheckDeps.push_back(Access);
899  IsRTCheckAnalysisNeeded = true;
900  }
901 
902  if (IsWrite)
903  SetHasWrite = true;
904 
905  // Create sets of pointers connected by a shared alias set and
906  // underlying object.
907  typedef SmallVector<Value *, 16> ValueVector;
908  ValueVector TempObjects;
909 
910  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
911  LLVM_DEBUG(dbgs()
912  << "Underlying objects for pointer " << *Ptr << "\n");
913  for (Value *UnderlyingObj : TempObjects) {
914  // nullptr never alias, don't join sets for pointer that have "null"
915  // in their UnderlyingObjects list.
916  if (isa<ConstantPointerNull>(UnderlyingObj) &&
918  TheLoop->getHeader()->getParent(),
919  UnderlyingObj->getType()->getPointerAddressSpace()))
920  continue;
921 
922  UnderlyingObjToAccessMap::iterator Prev =
923  ObjToLastAccess.find(UnderlyingObj);
924  if (Prev != ObjToLastAccess.end())
925  DepCands.unionSets(Access, Prev->second);
926 
927  ObjToLastAccess[UnderlyingObj] = Access;
928  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
929  }
930  }
931  }
932  }
933  }
934 }
935 
936 static bool isInBoundsGep(Value *Ptr) {
937  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
938  return GEP->isInBounds();
939  return false;
940 }
941 
942 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
943 /// i.e. monotonically increasing/decreasing.
944 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
945  PredicatedScalarEvolution &PSE, const Loop *L) {
946  // FIXME: This should probably only return true for NUW.
948  return true;
949 
950  // Scalar evolution does not propagate the non-wrapping flags to values that
951  // are derived from a non-wrapping induction variable because non-wrapping
952  // could be flow-sensitive.
953  //
954  // Look through the potentially overflowing instruction to try to prove
955  // non-wrapping for the *specific* value of Ptr.
956 
957  // The arithmetic implied by an inbounds GEP can't overflow.
958  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
959  if (!GEP || !GEP->isInBounds())
960  return false;
961 
962  // Make sure there is only one non-const index and analyze that.
963  Value *NonConstIndex = nullptr;
964  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
965  if (!isa<ConstantInt>(Index)) {
966  if (NonConstIndex)
967  return false;
968  NonConstIndex = Index;
969  }
970  if (!NonConstIndex)
971  // The recurrence is on the pointer, ignore for now.
972  return false;
973 
974  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
975  // AddRec using a NSW operation.
976  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
977  if (OBO->hasNoSignedWrap() &&
978  // Assume constant for other the operand so that the AddRec can be
979  // easily found.
980  isa<ConstantInt>(OBO->getOperand(1))) {
981  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
982 
983  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
984  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
985  }
986 
987  return false;
988 }
989 
990 /// Check whether the access through \p Ptr has a constant stride.
992  const Loop *Lp, const ValueToValueMap &StridesMap,
993  bool Assume, bool ShouldCheckWrap) {
994  Type *Ty = Ptr->getType();
995  assert(Ty->isPointerTy() && "Unexpected non-ptr");
996 
997  // Make sure that the pointer does not point to aggregate types.
998  auto *PtrTy = cast<PointerType>(Ty);
999  if (PtrTy->getElementType()->isAggregateType()) {
1000  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1001  << *Ptr << "\n");
1002  return 0;
1003  }
1004 
1005  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1006 
1007  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1008  if (Assume && !AR)
1009  AR = PSE.getAsAddRec(Ptr);
1010 
1011  if (!AR) {
1012  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1013  << " SCEV: " << *PtrScev << "\n");
1014  return 0;
1015  }
1016 
1017  // The accesss function must stride over the innermost loop.
1018  if (Lp != AR->getLoop()) {
1019  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1020  << *Ptr << " SCEV: " << *AR << "\n");
1021  return 0;
1022  }
1023 
1024  // The address calculation must not wrap. Otherwise, a dependence could be
1025  // inverted.
1026  // An inbounds getelementptr that is a AddRec with a unit stride
1027  // cannot wrap per definition. The unit stride requirement is checked later.
1028  // An getelementptr without an inbounds attribute and unit stride would have
1029  // to access the pointer value "0" which is undefined behavior in address
1030  // space 0, therefore we can also vectorize this case.
1031  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1032  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1034  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1035  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1037  PtrTy->getAddressSpace())) {
1038  if (Assume) {
1040  IsNoWrapAddRec = true;
1041  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1042  << "LAA: Pointer: " << *Ptr << "\n"
1043  << "LAA: SCEV: " << *AR << "\n"
1044  << "LAA: Added an overflow assumption\n");
1045  } else {
1046  LLVM_DEBUG(
1047  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1048  << *Ptr << " SCEV: " << *AR << "\n");
1049  return 0;
1050  }
1051  }
1052 
1053  // Check the step is constant.
1054  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1055 
1056  // Calculate the pointer stride and check if it is constant.
1057  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1058  if (!C) {
1059  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1060  << " SCEV: " << *AR << "\n");
1061  return 0;
1062  }
1063 
1064  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1065  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1066  const APInt &APStepVal = C->getAPInt();
1067 
1068  // Huge step value - give up.
1069  if (APStepVal.getBitWidth() > 64)
1070  return 0;
1071 
1072  int64_t StepVal = APStepVal.getSExtValue();
1073 
1074  // Strided access.
1075  int64_t Stride = StepVal / Size;
1076  int64_t Rem = StepVal % Size;
1077  if (Rem)
1078  return 0;
1079 
1080  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1081  // know we can't "wrap around the address space". In case of address space
1082  // zero we know that this won't happen without triggering undefined behavior.
1083  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1084  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1085  PtrTy->getAddressSpace()))) {
1086  if (Assume) {
1087  // We can avoid this case by adding a run-time check.
1088  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1089  << "inbouds or in address space 0 may wrap:\n"
1090  << "LAA: Pointer: " << *Ptr << "\n"
1091  << "LAA: SCEV: " << *AR << "\n"
1092  << "LAA: Added an overflow assumption\n");
1094  } else
1095  return 0;
1096  }
1097 
1098  return Stride;
1099 }
1100 
1102  ScalarEvolution &SE,
1103  SmallVectorImpl<unsigned> &SortedIndices) {
1105  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1106  "Expected list of pointer operands.");
1108  OffValPairs.reserve(VL.size());
1109 
1110  // Walk over the pointers, and map each of them to an offset relative to
1111  // first pointer in the array.
1112  Value *Ptr0 = VL[0];
1113  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1114  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1115 
1117  for (auto *Ptr : VL) {
1118  // TODO: Outline this code as a special, more time consuming, version of
1119  // computeConstantDifference() function.
1120  if (Ptr->getType()->getPointerAddressSpace() !=
1121  Ptr0->getType()->getPointerAddressSpace())
1122  return false;
1123  // If a pointer refers to a different underlying object, bail - the
1124  // pointers are by definition incomparable.
1125  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1126  if (CurrObj != Obj0)
1127  return false;
1128 
1129  const SCEV *Scev = SE.getSCEV(Ptr);
1130  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1131  // The pointers may not have a constant offset from each other, or SCEV
1132  // may just not be smart enough to figure out they do. Regardless,
1133  // there's nothing we can do.
1134  if (!Diff)
1135  return false;
1136 
1137  // Check if the pointer with the same offset is found.
1138  int64_t Offset = Diff->getAPInt().getSExtValue();
1139  if (!Offsets.insert(Offset).second)
1140  return false;
1141  OffValPairs.emplace_back(Offset, Ptr);
1142  }
1143  SortedIndices.clear();
1144  SortedIndices.resize(VL.size());
1145  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1146 
1147  // Sort the memory accesses and keep the order of their uses in UseOrder.
1148  std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
1149  [&OffValPairs](unsigned Left, unsigned Right) {
1150  return OffValPairs[Left].first < OffValPairs[Right].first;
1151  });
1152 
1153  // Check if the order is consecutive already.
1154  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1155  return I == SortedIndices[I];
1156  }))
1157  SortedIndices.clear();
1158 
1159  return true;
1160 }
1161 
1162 /// Take the address space operand from the Load/Store instruction.
1163 /// Returns -1 if this is not a valid Load/Store instruction.
1164 static unsigned getAddressSpaceOperand(Value *I) {
1165  if (LoadInst *L = dyn_cast<LoadInst>(I))
1166  return L->getPointerAddressSpace();
1167  if (StoreInst *S = dyn_cast<StoreInst>(I))
1168  return S->getPointerAddressSpace();
1169  return -1;
1170 }
1171 
1172 /// Returns true if the memory operations \p A and \p B are consecutive.
1174  ScalarEvolution &SE, bool CheckType) {
1175  Value *PtrA = getLoadStorePointerOperand(A);
1176  Value *PtrB = getLoadStorePointerOperand(B);
1177  unsigned ASA = getAddressSpaceOperand(A);
1178  unsigned ASB = getAddressSpaceOperand(B);
1179 
1180  // Check that the address spaces match and that the pointers are valid.
1181  if (!PtrA || !PtrB || (ASA != ASB))
1182  return false;
1183 
1184  // Make sure that A and B are different pointers.
1185  if (PtrA == PtrB)
1186  return false;
1187 
1188  // Make sure that A and B have the same type if required.
1189  if (CheckType && PtrA->getType() != PtrB->getType())
1190  return false;
1191 
1192  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1193  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1194  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1195 
1196  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1197  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1198  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1199 
1200  // OffsetDelta = OffsetB - OffsetA;
1201  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1202  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1203  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1204  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1205  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1206  // Check if they are based on the same pointer. That makes the offsets
1207  // sufficient.
1208  if (PtrA == PtrB)
1209  return OffsetDelta == Size;
1210 
1211  // Compute the necessary base pointer delta to have the necessary final delta
1212  // equal to the size.
1213  // BaseDelta = Size - OffsetDelta;
1214  const SCEV *SizeSCEV = SE.getConstant(Size);
1215  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1216 
1217  // Otherwise compute the distance with SCEV between the base pointers.
1218  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1219  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1220  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1221  return X == PtrSCEVB;
1222 }
1223 
1225  switch (Type) {
1226  case NoDep:
1227  case Forward:
1228  case BackwardVectorizable:
1229  return true;
1230 
1231  case Unknown:
1232  case ForwardButPreventsForwarding:
1233  case Backward:
1234  case BackwardVectorizableButPreventsForwarding:
1235  return false;
1236  }
1237  llvm_unreachable("unexpected DepType!");
1238 }
1239 
1241  switch (Type) {
1242  case NoDep:
1243  case Forward:
1244  case ForwardButPreventsForwarding:
1245  case Unknown:
1246  return false;
1247 
1248  case BackwardVectorizable:
1249  case Backward:
1250  case BackwardVectorizableButPreventsForwarding:
1251  return true;
1252  }
1253  llvm_unreachable("unexpected DepType!");
1254 }
1255 
1257  return isBackward() || Type == Unknown;
1258 }
1259 
1261  switch (Type) {
1262  case Forward:
1263  case ForwardButPreventsForwarding:
1264  return true;
1265 
1266  case NoDep:
1267  case Unknown:
1268  case BackwardVectorizable:
1269  case Backward:
1270  case BackwardVectorizableButPreventsForwarding:
1271  return false;
1272  }
1273  llvm_unreachable("unexpected DepType!");
1274 }
1275 
1276 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1277  uint64_t TypeByteSize) {
1278  // If loads occur at a distance that is not a multiple of a feasible vector
1279  // factor store-load forwarding does not take place.
1280  // Positive dependences might cause troubles because vectorizing them might
1281  // prevent store-load forwarding making vectorized code run a lot slower.
1282  // a[i] = a[i-3] ^ a[i-8];
1283  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1284  // hence on your typical architecture store-load forwarding does not take
1285  // place. Vectorizing in such cases does not make sense.
1286  // Store-load forwarding distance.
1287 
1288  // After this many iterations store-to-load forwarding conflicts should not
1289  // cause any slowdowns.
1290  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1291  // Maximum vector factor.
1292  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1293  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1294 
1295  // Compute the smallest VF at which the store and load would be misaligned.
1296  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1297  VF *= 2) {
1298  // If the number of vector iteration between the store and the load are
1299  // small we could incur conflicts.
1300  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1301  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1302  break;
1303  }
1304  }
1305 
1306  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1307  LLVM_DEBUG(
1308  dbgs() << "LAA: Distance " << Distance
1309  << " that could cause a store-load forwarding conflict\n");
1310  return true;
1311  }
1312 
1313  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1314  MaxVFWithoutSLForwardIssues !=
1315  VectorizerParams::MaxVectorWidth * TypeByteSize)
1316  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1317  return false;
1318 }
1319 
1320 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1321 /// memory accesses, that have the same stride whose absolute value is given
1322 /// in \p Stride, and that have the same type size \p TypeByteSize,
1323 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1324 /// possible to prove statically that the dependence distance is larger
1325 /// than the range that the accesses will travel through the execution of
1326 /// the loop. If so, return true; false otherwise. This is useful for
1327 /// example in loops such as the following (PR31098):
1328 /// for (i = 0; i < D; ++i) {
1329 /// = out[i];
1330 /// out[i+D] =
1331 /// }
1333  const SCEV &BackedgeTakenCount,
1334  const SCEV &Dist, uint64_t Stride,
1335  uint64_t TypeByteSize) {
1336 
1337  // If we can prove that
1338  // (**) |Dist| > BackedgeTakenCount * Step
1339  // where Step is the absolute stride of the memory accesses in bytes,
1340  // then there is no dependence.
1341  //
1342  // Ratioanle:
1343  // We basically want to check if the absolute distance (|Dist/Step|)
1344  // is >= the loop iteration count (or > BackedgeTakenCount).
1345  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1346  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1347  // that the dependence distance is >= VF; This is checked elsewhere.
1348  // But in some cases we can prune unknown dependence distances early, and
1349  // even before selecting the VF, and without a runtime test, by comparing
1350  // the distance against the loop iteration count. Since the vectorized code
1351  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1352  // also guarantees that distance >= VF.
1353  //
1354  const uint64_t ByteStride = Stride * TypeByteSize;
1355  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1356  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1357 
1358  const SCEV *CastedDist = &Dist;
1359  const SCEV *CastedProduct = Product;
1360  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1361  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1362 
1363  // The dependence distance can be positive/negative, so we sign extend Dist;
1364  // The multiplication of the absolute stride in bytes and the
1365  // backdgeTakenCount is non-negative, so we zero extend Product.
1366  if (DistTypeSize > ProductTypeSize)
1367  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1368  else
1369  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1370 
1371  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1372  // (If so, then we have proven (**) because |Dist| >= Dist)
1373  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1374  if (SE.isKnownPositive(Minus))
1375  return true;
1376 
1377  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1378  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1379  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1380  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1381  if (SE.isKnownPositive(Minus))
1382  return true;
1383 
1384  return false;
1385 }
1386 
1387 /// Check the dependence for two accesses with the same stride \p Stride.
1388 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1389 /// bytes.
1390 ///
1391 /// \returns true if they are independent.
1392 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1393  uint64_t TypeByteSize) {
1394  assert(Stride > 1 && "The stride must be greater than 1");
1395  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1396  assert(Distance > 0 && "The distance must be non-zero");
1397 
1398  // Skip if the distance is not multiple of type byte size.
1399  if (Distance % TypeByteSize)
1400  return false;
1401 
1402  uint64_t ScaledDist = Distance / TypeByteSize;
1403 
1404  // No dependence if the scaled distance is not multiple of the stride.
1405  // E.g.
1406  // for (i = 0; i < 1024 ; i += 4)
1407  // A[i+2] = A[i] + 1;
1408  //
1409  // Two accesses in memory (scaled distance is 2, stride is 4):
1410  // | A[0] | | | | A[4] | | | |
1411  // | | | A[2] | | | | A[6] | |
1412  //
1413  // E.g.
1414  // for (i = 0; i < 1024 ; i += 3)
1415  // A[i+4] = A[i] + 1;
1416  //
1417  // Two accesses in memory (scaled distance is 4, stride is 3):
1418  // | A[0] | | | A[3] | | | A[6] | | |
1419  // | | | | | A[4] | | | A[7] | |
1420  return ScaledDist % Stride;
1421 }
1422 
1424 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1425  const MemAccessInfo &B, unsigned BIdx,
1426  const ValueToValueMap &Strides) {
1427  assert (AIdx < BIdx && "Must pass arguments in program order");
1428 
1429  Value *APtr = A.getPointer();
1430  Value *BPtr = B.getPointer();
1431  bool AIsWrite = A.getInt();
1432  bool BIsWrite = B.getInt();
1433 
1434  // Two reads are independent.
1435  if (!AIsWrite && !BIsWrite)
1436  return Dependence::NoDep;
1437 
1438  // We cannot check pointers in different address spaces.
1439  if (APtr->getType()->getPointerAddressSpace() !=
1440  BPtr->getType()->getPointerAddressSpace())
1441  return Dependence::Unknown;
1442 
1443  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1444  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1445 
1446  const SCEV *Src = PSE.getSCEV(APtr);
1447  const SCEV *Sink = PSE.getSCEV(BPtr);
1448 
1449  // If the induction step is negative we have to invert source and sink of the
1450  // dependence.
1451  if (StrideAPtr < 0) {
1452  std::swap(APtr, BPtr);
1453  std::swap(Src, Sink);
1454  std::swap(AIsWrite, BIsWrite);
1455  std::swap(AIdx, BIdx);
1456  std::swap(StrideAPtr, StrideBPtr);
1457  }
1458 
1459  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1460 
1461  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1462  << "(Induction step: " << StrideAPtr << ")\n");
1463  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1464  << *InstMap[BIdx] << ": " << *Dist << "\n");
1465 
1466  // Need accesses with constant stride. We don't want to vectorize
1467  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1468  // the address space.
1469  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1470  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1471  return Dependence::Unknown;
1472  }
1473 
1474  Type *ATy = APtr->getType()->getPointerElementType();
1475  Type *BTy = BPtr->getType()->getPointerElementType();
1476  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1477  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1478  uint64_t Stride = std::abs(StrideAPtr);
1479  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1480  if (!C) {
1481  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1482  isSafeDependenceDistance(DL, *(PSE.getSE()),
1483  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1484  TypeByteSize))
1485  return Dependence::NoDep;
1486 
1487  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1488  ShouldRetryWithRuntimeCheck = true;
1489  return Dependence::Unknown;
1490  }
1491 
1492  const APInt &Val = C->getAPInt();
1493  int64_t Distance = Val.getSExtValue();
1494 
1495  // Attempt to prove strided accesses independent.
1496  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1497  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1498  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1499  return Dependence::NoDep;
1500  }
1501 
1502  // Negative distances are not plausible dependencies.
1503  if (Val.isNegative()) {
1504  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1505  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1506  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1507  ATy != BTy)) {
1508  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1509  return Dependence::ForwardButPreventsForwarding;
1510  }
1511 
1512  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1513  return Dependence::Forward;
1514  }
1515 
1516  // Write to the same location with the same size.
1517  // Could be improved to assert type sizes are the same (i32 == float, etc).
1518  if (Val == 0) {
1519  if (ATy == BTy)
1520  return Dependence::Forward;
1521  LLVM_DEBUG(
1522  dbgs() << "LAA: Zero dependence difference but different types\n");
1523  return Dependence::Unknown;
1524  }
1525 
1526  assert(Val.isStrictlyPositive() && "Expect a positive value");
1527 
1528  if (ATy != BTy) {
1529  LLVM_DEBUG(
1530  dbgs()
1531  << "LAA: ReadWrite-Write positive dependency with different types\n");
1532  return Dependence::Unknown;
1533  }
1534 
1535  // Bail out early if passed-in parameters make vectorization not feasible.
1536  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1538  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1540  // The minimum number of iterations for a vectorized/unrolled version.
1541  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1542 
1543  // It's not vectorizable if the distance is smaller than the minimum distance
1544  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1545  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1546  // TypeByteSize (No need to plus the last gap distance).
1547  //
1548  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1549  // foo(int *A) {
1550  // int *B = (int *)((char *)A + 14);
1551  // for (i = 0 ; i < 1024 ; i += 2)
1552  // B[i] = A[i] + 1;
1553  // }
1554  //
1555  // Two accesses in memory (stride is 2):
1556  // | A[0] | | A[2] | | A[4] | | A[6] | |
1557  // | B[0] | | B[2] | | B[4] |
1558  //
1559  // Distance needs for vectorizing iterations except the last iteration:
1560  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1561  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1562  //
1563  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1564  // 12, which is less than distance.
1565  //
1566  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1567  // the minimum distance needed is 28, which is greater than distance. It is
1568  // not safe to do vectorization.
1569  uint64_t MinDistanceNeeded =
1570  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1571  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1572  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1573  << Distance << '\n');
1574  return Dependence::Backward;
1575  }
1576 
1577  // Unsafe if the minimum distance needed is greater than max safe distance.
1578  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1579  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1580  << MinDistanceNeeded << " size in bytes");
1581  return Dependence::Backward;
1582  }
1583 
1584  // Positive distance bigger than max vectorization factor.
1585  // FIXME: Should use max factor instead of max distance in bytes, which could
1586  // not handle different types.
1587  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1588  // void foo (int *A, char *B) {
1589  // for (unsigned i = 0; i < 1024; i++) {
1590  // A[i+2] = A[i] + 1;
1591  // B[i+2] = B[i] + 1;
1592  // }
1593  // }
1594  //
1595  // This case is currently unsafe according to the max safe distance. If we
1596  // analyze the two accesses on array B, the max safe dependence distance
1597  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1598  // is 8, which is less than 2 and forbidden vectorization, But actually
1599  // both A and B could be vectorized by 2 iterations.
1600  MaxSafeDepDistBytes =
1601  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1602 
1603  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1604  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1605  couldPreventStoreLoadForward(Distance, TypeByteSize))
1606  return Dependence::BackwardVectorizableButPreventsForwarding;
1607 
1608  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1609  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1610  << " with max VF = " << MaxVF << '\n');
1611  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1612  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1613  return Dependence::BackwardVectorizable;
1614 }
1615 
1617  MemAccessInfoList &CheckDeps,
1618  const ValueToValueMap &Strides) {
1619 
1620  MaxSafeDepDistBytes = -1;
1622  for (MemAccessInfo CurAccess : CheckDeps) {
1623  if (Visited.count(CurAccess))
1624  continue;
1625 
1626  // Get the relevant memory access set.
1628  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1629 
1630  // Check accesses within this set.
1632  AccessSets.member_begin(I);
1634  AccessSets.member_end();
1635 
1636  // Check every access pair.
1637  while (AI != AE) {
1638  Visited.insert(*AI);
1640  while (OI != AE) {
1641  // Check every accessing instruction pair in program order.
1642  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1643  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1644  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1645  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1646  auto A = std::make_pair(&*AI, *I1);
1647  auto B = std::make_pair(&*OI, *I2);
1648 
1649  assert(*I1 != *I2);
1650  if (*I1 > *I2)
1651  std::swap(A, B);
1652 
1654  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1655  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1656 
1657  // Gather dependences unless we accumulated MaxDependences
1658  // dependences. In that case return as soon as we find the first
1659  // unsafe dependence. This puts a limit on this quadratic
1660  // algorithm.
1661  if (RecordDependences) {
1662  if (Type != Dependence::NoDep)
1663  Dependences.push_back(Dependence(A.second, B.second, Type));
1664 
1665  if (Dependences.size() >= MaxDependences) {
1666  RecordDependences = false;
1667  Dependences.clear();
1668  LLVM_DEBUG(dbgs()
1669  << "Too many dependences, stopped recording\n");
1670  }
1671  }
1672  if (!RecordDependences && !SafeForVectorization)
1673  return false;
1674  }
1675  ++OI;
1676  }
1677  AI++;
1678  }
1679  }
1680 
1681  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1682  return SafeForVectorization;
1683 }
1684 
1687  MemAccessInfo Access(Ptr, isWrite);
1688  auto &IndexVector = Accesses.find(Access)->second;
1689 
1691  transform(IndexVector,
1692  std::back_inserter(Insts),
1693  [&](unsigned Idx) { return this->InstMap[Idx]; });
1694  return Insts;
1695 }
1696 
1697 const char *MemoryDepChecker::Dependence::DepName[] = {
1698  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1699  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1700 
1702  raw_ostream &OS, unsigned Depth,
1703  const SmallVectorImpl<Instruction *> &Instrs) const {
1704  OS.indent(Depth) << DepName[Type] << ":\n";
1705  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1706  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1707 }
1708 
1709 bool LoopAccessInfo::canAnalyzeLoop() {
1710  // We need to have a loop header.
1711  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1712  << TheLoop->getHeader()->getParent()->getName() << ": "
1713  << TheLoop->getHeader()->getName() << '\n');
1714 
1715  // We can only analyze innermost loops.
1716  if (!TheLoop->empty()) {
1717  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1718  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1719  return false;
1720  }
1721 
1722  // We must have a single backedge.
1723  if (TheLoop->getNumBackEdges() != 1) {
1724  LLVM_DEBUG(
1725  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1726  recordAnalysis("CFGNotUnderstood")
1727  << "loop control flow is not understood by analyzer";
1728  return false;
1729  }
1730 
1731  // We must have a single exiting block.
1732  if (!TheLoop->getExitingBlock()) {
1733  LLVM_DEBUG(
1734  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1735  recordAnalysis("CFGNotUnderstood")
1736  << "loop control flow is not understood by analyzer";
1737  return false;
1738  }
1739 
1740  // We only handle bottom-tested loops, i.e. loop in which the condition is
1741  // checked at the end of each iteration. With that we can assume that all
1742  // instructions in the loop are executed the same number of times.
1743  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1744  LLVM_DEBUG(
1745  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1746  recordAnalysis("CFGNotUnderstood")
1747  << "loop control flow is not understood by analyzer";
1748  return false;
1749  }
1750 
1751  // ScalarEvolution needs to be able to find the exit count.
1752  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1753  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1754  recordAnalysis("CantComputeNumberOfIterations")
1755  << "could not determine number of loop iterations";
1756  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1757  return false;
1758  }
1759 
1760  return true;
1761 }
1762 
1763 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1764  const TargetLibraryInfo *TLI,
1765  DominatorTree *DT) {
1766  typedef SmallPtrSet<Value*, 16> ValueSet;
1767 
1768  // Holds the Load and Store instructions.
1771 
1772  // Holds all the different accesses in the loop.
1773  unsigned NumReads = 0;
1774  unsigned NumReadWrites = 0;
1775 
1776  PtrRtChecking->Pointers.clear();
1777  PtrRtChecking->Need = false;
1778 
1779  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1780 
1781  // For each block.
1782  for (BasicBlock *BB : TheLoop->blocks()) {
1783  // Scan the BB and collect legal loads and stores.
1784  for (Instruction &I : *BB) {
1785  // If this is a load, save it. If this instruction can read from memory
1786  // but is not a load, then we quit. Notice that we don't handle function
1787  // calls that read or write.
1788  if (I.mayReadFromMemory()) {
1789  // Many math library functions read the rounding mode. We will only
1790  // vectorize a loop if it contains known function calls that don't set
1791  // the flag. Therefore, it is safe to ignore this read from memory.
1792  auto *Call = dyn_cast<CallInst>(&I);
1793  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1794  continue;
1795 
1796  // If the function has an explicit vectorized counterpart, we can safely
1797  // assume that it can be vectorized.
1798  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1799  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1800  continue;
1801 
1802  auto *Ld = dyn_cast<LoadInst>(&I);
1803  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1804  recordAnalysis("NonSimpleLoad", Ld)
1805  << "read with atomic ordering or volatile read";
1806  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1807  CanVecMem = false;
1808  return;
1809  }
1810  NumLoads++;
1811  Loads.push_back(Ld);
1812  DepChecker->addAccess(Ld);
1814  collectStridedAccess(Ld);
1815  continue;
1816  }
1817 
1818  // Save 'store' instructions. Abort if other instructions write to memory.
1819  if (I.mayWriteToMemory()) {
1820  auto *St = dyn_cast<StoreInst>(&I);
1821  if (!St) {
1822  recordAnalysis("CantVectorizeInstruction", St)
1823  << "instruction cannot be vectorized";
1824  CanVecMem = false;
1825  return;
1826  }
1827  if (!St->isSimple() && !IsAnnotatedParallel) {
1828  recordAnalysis("NonSimpleStore", St)
1829  << "write with atomic ordering or volatile write";
1830  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1831  CanVecMem = false;
1832  return;
1833  }
1834  NumStores++;
1835  Stores.push_back(St);
1836  DepChecker->addAccess(St);
1838  collectStridedAccess(St);
1839  }
1840  } // Next instr.
1841  } // Next block.
1842 
1843  // Now we have two lists that hold the loads and the stores.
1844  // Next, we find the pointers that they use.
1845 
1846  // Check if we see any stores. If there are no stores, then we don't
1847  // care if the pointers are *restrict*.
1848  if (!Stores.size()) {
1849  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1850  CanVecMem = true;
1851  return;
1852  }
1853 
1854  MemoryDepChecker::DepCandidates DependentAccesses;
1855  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1856  TheLoop, AA, LI, DependentAccesses, *PSE);
1857 
1858  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1859  // multiple times on the same object. If the ptr is accessed twice, once
1860  // for read and once for write, it will only appear once (on the write
1861  // list). This is okay, since we are going to check for conflicts between
1862  // writes and between reads and writes, but not between reads and reads.
1863  ValueSet Seen;
1864 
1865  for (StoreInst *ST : Stores) {
1866  Value *Ptr = ST->getPointerOperand();
1867  // Check for store to loop invariant address.
1868  StoreToLoopInvariantAddress |= isUniform(Ptr);
1869  // If we did *not* see this pointer before, insert it to the read-write
1870  // list. At this phase it is only a 'write' list.
1871  if (Seen.insert(Ptr).second) {
1872  ++NumReadWrites;
1873 
1875  // The TBAA metadata could have a control dependency on the predication
1876  // condition, so we cannot rely on it when determining whether or not we
1877  // need runtime pointer checks.
1878  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1879  Loc.AATags.TBAA = nullptr;
1880 
1881  Accesses.addStore(Loc);
1882  }
1883  }
1884 
1885  if (IsAnnotatedParallel) {
1886  LLVM_DEBUG(
1887  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1888  << "checks.\n");
1889  CanVecMem = true;
1890  return;
1891  }
1892 
1893  for (LoadInst *LD : Loads) {
1894  Value *Ptr = LD->getPointerOperand();
1895  // If we did *not* see this pointer before, insert it to the
1896  // read list. If we *did* see it before, then it is already in
1897  // the read-write list. This allows us to vectorize expressions
1898  // such as A[i] += x; Because the address of A[i] is a read-write
1899  // pointer. This only works if the index of A[i] is consecutive.
1900  // If the address of i is unknown (for example A[B[i]]) then we may
1901  // read a few words, modify, and write a few words, and some of the
1902  // words may be written to the same address.
1903  bool IsReadOnlyPtr = false;
1904  if (Seen.insert(Ptr).second ||
1905  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1906  ++NumReads;
1907  IsReadOnlyPtr = true;
1908  }
1909 
1911  // The TBAA metadata could have a control dependency on the predication
1912  // condition, so we cannot rely on it when determining whether or not we
1913  // need runtime pointer checks.
1914  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1915  Loc.AATags.TBAA = nullptr;
1916 
1917  Accesses.addLoad(Loc, IsReadOnlyPtr);
1918  }
1919 
1920  // If we write (or read-write) to a single destination and there are no
1921  // other reads in this loop then is it safe to vectorize.
1922  if (NumReadWrites == 1 && NumReads == 0) {
1923  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1924  CanVecMem = true;
1925  return;
1926  }
1927 
1928  // Build dependence sets and check whether we need a runtime pointer bounds
1929  // check.
1930  Accesses.buildDependenceSets();
1931 
1932  // Find pointers with computable bounds. We are going to use this information
1933  // to place a runtime bound check.
1934  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1935  TheLoop, SymbolicStrides);
1936  if (!CanDoRTIfNeeded) {
1937  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1938  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1939  << "the array bounds.\n");
1940  CanVecMem = false;
1941  return;
1942  }
1943 
1944  LLVM_DEBUG(
1945  dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1946 
1947  CanVecMem = true;
1948  if (Accesses.isDependencyCheckNeeded()) {
1949  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1950  CanVecMem = DepChecker->areDepsSafe(
1951  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1952  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1953 
1954  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1955  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1956 
1957  // Clear the dependency checks. We assume they are not needed.
1958  Accesses.resetDepChecks(*DepChecker);
1959 
1960  PtrRtChecking->reset();
1961  PtrRtChecking->Need = true;
1962 
1963  auto *SE = PSE->getSE();
1964  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1965  SymbolicStrides, true);
1966 
1967  // Check that we found the bounds for the pointer.
1968  if (!CanDoRTIfNeeded) {
1969  recordAnalysis("CantCheckMemDepsAtRunTime")
1970  << "cannot check memory dependencies at runtime";
1971  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1972  CanVecMem = false;
1973  return;
1974  }
1975 
1976  CanVecMem = true;
1977  }
1978  }
1979 
1980  if (CanVecMem)
1981  LLVM_DEBUG(
1982  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1983  << (PtrRtChecking->Need ? "" : " don't")
1984  << " need runtime memory checks.\n");
1985  else {
1986  recordAnalysis("UnsafeMemDep")
1987  << "unsafe dependent memory operations in loop. Use "
1988  "#pragma loop distribute(enable) to allow loop distribution "
1989  "to attempt to isolate the offending operations into a separate "
1990  "loop";
1991  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1992  }
1993 }
1994 
1996  DominatorTree *DT) {
1997  assert(TheLoop->contains(BB) && "Unknown block used");
1998 
1999  // Blocks that do not dominate the latch need predication.
2000  BasicBlock* Latch = TheLoop->getLoopLatch();
2001  return !DT->dominates(BB, Latch);
2002 }
2003 
2004 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2005  Instruction *I) {
2006  assert(!Report && "Multiple reports generated");
2007 
2008  Value *CodeRegion = TheLoop->getHeader();
2009  DebugLoc DL = TheLoop->getStartLoc();
2010 
2011  if (I) {
2012  CodeRegion = I->getParent();
2013  // If there is no debug location attached to the instruction, revert back to
2014  // using the loop's.
2015  if (I->getDebugLoc())
2016  DL = I->getDebugLoc();
2017  }
2018 
2019  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2020  CodeRegion);
2021  return *Report;
2022 }
2023 
2025  auto *SE = PSE->getSE();
2026  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2027  // never considered uniform.
2028  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2029  // trivially loop-invariant FP values to be considered uniform.
2030  if (!SE->isSCEVable(V->getType()))
2031  return false;
2032  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2033 }
2034 
2035 // FIXME: this function is currently a duplicate of the one in
2036 // LoopVectorize.cpp.
2038  Instruction *Loc) {
2039  if (FirstInst)
2040  return FirstInst;
2041  if (Instruction *I = dyn_cast<Instruction>(V))
2042  return I->getParent() == Loc->getParent() ? I : nullptr;
2043  return nullptr;
2044 }
2045 
2046 namespace {
2047 
2048 /// IR Values for the lower and upper bounds of a pointer evolution. We
2049 /// need to use value-handles because SCEV expansion can invalidate previously
2050 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2051 /// a previous one.
2052 struct PointerBounds {
2053  TrackingVH<Value> Start;
2054  TrackingVH<Value> End;
2055 };
2056 
2057 } // end anonymous namespace
2058 
2059 /// Expand code for the lower and upper bound of the pointer group \p CG
2060 /// in \p TheLoop. \return the values for the bounds.
2061 static PointerBounds
2063  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2064  const RuntimePointerChecking &PtrRtChecking) {
2065  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2066  const SCEV *Sc = SE->getSCEV(Ptr);
2067 
2068  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2069  LLVMContext &Ctx = Loc->getContext();
2070 
2071  // Use this type for pointer arithmetic.
2072  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2073 
2074  if (SE->isLoopInvariant(Sc, TheLoop)) {
2075  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2076  << *Ptr << "\n");
2077  // Ptr could be in the loop body. If so, expand a new one at the correct
2078  // location.
2079  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2080  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2081  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2082  : Ptr;
2083  // We must return a half-open range, which means incrementing Sc.
2084  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2085  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2086  return {NewPtr, NewPtrPlusOne};
2087  } else {
2088  Value *Start = nullptr, *End = nullptr;
2089  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2090  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2091  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2092  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2093  << "\n");
2094  return {Start, End};
2095  }
2096 }
2097 
2098 /// Turns a collection of checks into a collection of expanded upper and
2099 /// lower bounds for both pointers in the check.
2102  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2103  const RuntimePointerChecking &PtrRtChecking) {
2105 
2106  // Here we're relying on the SCEV Expander's cache to only emit code for the
2107  // same bounds once.
2108  transform(
2109  PointerChecks, std::back_inserter(ChecksWithBounds),
2111  PointerBounds
2112  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2113  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2114  return std::make_pair(First, Second);
2115  });
2116 
2117  return ChecksWithBounds;
2118 }
2119 
2120 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2121  Instruction *Loc,
2123  const {
2124  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2125  auto *SE = PSE->getSE();
2126  SCEVExpander Exp(*SE, DL, "induction");
2127  auto ExpandedChecks =
2128  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2129 
2130  LLVMContext &Ctx = Loc->getContext();
2131  Instruction *FirstInst = nullptr;
2132  IRBuilder<> ChkBuilder(Loc);
2133  // Our instructions might fold to a constant.
2134  Value *MemoryRuntimeCheck = nullptr;
2135 
2136  for (const auto &Check : ExpandedChecks) {
2137  const PointerBounds &A = Check.first, &B = Check.second;
2138  // Check if two pointers (A and B) conflict where conflict is computed as:
2139  // start(A) <= end(B) && start(B) <= end(A)
2140  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2141  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2142 
2143  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2144  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2145  "Trying to bounds check pointers with different address spaces");
2146 
2147  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2148  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2149 
2150  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2151  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2152  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2153  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2154 
2155  // [A|B].Start points to the first accessed byte under base [A|B].
2156  // [A|B].End points to the last accessed byte, plus one.
2157  // There is no conflict when the intervals are disjoint:
2158  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2159  //
2160  // bound0 = (B.Start < A.End)
2161  // bound1 = (A.Start < B.End)
2162  // IsConflict = bound0 & bound1
2163  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2164  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2165  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2166  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2167  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2168  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2169  if (MemoryRuntimeCheck) {
2170  IsConflict =
2171  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2172  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2173  }
2174  MemoryRuntimeCheck = IsConflict;
2175  }
2176 
2177  if (!MemoryRuntimeCheck)
2178  return std::make_pair(nullptr, nullptr);
2179 
2180  // We have to do this trickery because the IRBuilder might fold the check to a
2181  // constant expression in which case there is no Instruction anchored in a
2182  // the block.
2183  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2184  ConstantInt::getTrue(Ctx));
2185  ChkBuilder.Insert(Check, "memcheck.conflict");
2186  FirstInst = getFirstInst(FirstInst, Check, Loc);
2187  return std::make_pair(FirstInst, Check);
2188 }
2189 
2190 std::pair<Instruction *, Instruction *>
2192  if (!PtrRtChecking->Need)
2193  return std::make_pair(nullptr, nullptr);
2194 
2195  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2196 }
2197 
2198 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2199  Value *Ptr = nullptr;
2200  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2201  Ptr = LI->getPointerOperand();
2202  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2203  Ptr = SI->getPointerOperand();
2204  else
2205  return;
2206 
2207  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2208  if (!Stride)
2209  return;
2210 
2211  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2212  "versioning:");
2213  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2214 
2215  // Avoid adding the "Stride == 1" predicate when we know that
2216  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2217  // or zero iteration loop, as Trip-Count <= Stride == 1.
2218  //
2219  // TODO: We are currently not making a very informed decision on when it is
2220  // beneficial to apply stride versioning. It might make more sense that the
2221  // users of this analysis (such as the vectorizer) will trigger it, based on
2222  // their specific cost considerations; For example, in cases where stride
2223  // versioning does not help resolving memory accesses/dependences, the
2224  // vectorizer should evaluate the cost of the runtime test, and the benefit
2225  // of various possible stride specializations, considering the alternatives
2226  // of using gather/scatters (if available).
2227 
2228  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2229  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2230 
2231  // Match the types so we can compare the stride and the BETakenCount.
2232  // The Stride can be positive/negative, so we sign extend Stride;
2233  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
2234  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2235  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2236  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2237  const SCEV *CastedStride = StrideExpr;
2238  const SCEV *CastedBECount = BETakenCount;
2239  ScalarEvolution *SE = PSE->getSE();
2240  if (BETypeSize >= StrideTypeSize)
2241  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2242  else
2243  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2244  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2245  // Since TripCount == BackEdgeTakenCount + 1, checking:
2246  // "Stride >= TripCount" is equivalent to checking:
2247  // Stride - BETakenCount > 0
2248  if (SE->isKnownPositive(StrideMinusBETaken)) {
2249  LLVM_DEBUG(
2250  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2251  "Stride==1 predicate will imply that the loop executes "
2252  "at most once.\n");
2253  return;
2254  }
2255  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2256 
2257  SymbolicStrides[Ptr] = Stride;
2258  StrideSet.insert(Stride);
2259 }
2260 
2262  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2263  DominatorTree *DT, LoopInfo *LI)
2264  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2265  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2266  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2267  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2268  StoreToLoopInvariantAddress(false) {
2269  if (canAnalyzeLoop())
2270  analyzeLoop(AA, LI, TLI, DT);
2271 }
2272 
2273 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2274  if (CanVecMem) {
2275  OS.indent(Depth) << "Memory dependences are safe";
2276  if (MaxSafeDepDistBytes != -1ULL)
2277  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2278  << " bytes";
2279  if (PtrRtChecking->Need)
2280  OS << " with run-time checks";
2281  OS << "\n";
2282  }
2283 
2284  if (Report)
2285  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2286 
2287  if (auto *Dependences = DepChecker->getDependences()) {
2288  OS.indent(Depth) << "Dependences:\n";
2289  for (auto &Dep : *Dependences) {
2290  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2291  OS << "\n";
2292  }
2293  } else
2294  OS.indent(Depth) << "Too many dependences, not recorded\n";
2295 
2296  // List the pair of accesses need run-time checks to prove independence.
2297  PtrRtChecking->print(OS, Depth);
2298  OS << "\n";
2299 
2300  OS.indent(Depth) << "Store to invariant address was "
2301  << (StoreToLoopInvariantAddress ? "" : "not ")
2302  << "found in loop.\n";
2303 
2304  OS.indent(Depth) << "SCEV assumptions:\n";
2305  PSE->getUnionPredicate().print(OS, Depth);
2306 
2307  OS << "\n";
2308 
2309  OS.indent(Depth) << "Expressions re-written:\n";
2310  PSE->print(OS, Depth);
2311 }
2312 
2314  auto &LAI = LoopAccessInfoMap[L];
2315 
2316  if (!LAI)
2317  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2318 
2319  return *LAI.get();
2320 }
2321 
2323  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2324 
2325  for (Loop *TopLevelLoop : *LI)
2326  for (Loop *L : depth_first(TopLevelLoop)) {
2327  OS.indent(2) << L->getHeader()->getName() << ":\n";
2328  auto &LAI = LAA.getInfo(L);
2329  LAI.print(OS, 4);
2330  }
2331 }
2332 
2334  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2335  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2336  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2337  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2338  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2339  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2340 
2341  return false;
2342 }
2343 
2349 
2350  AU.setPreservesAll();
2351 }
2352 
2354 static const char laa_name[] = "Loop Access Analysis";
2355 #define LAA_NAME "loop-accesses"
2356 
2363 
2365 
2368  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2369 }
2370 
2371 namespace llvm {
2372 
2374  return new LoopAccessLegacyAnalysis();
2375  }
2376 
2377 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1794
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:218
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:656
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1752
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:943
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1030
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1175
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:376
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:87
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1569
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:310
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1130
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:146
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:337
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
const AMDGPUAS & AS
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:346
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:53
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:188
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1426
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:557
Class for arbitrary precision integers.
Definition: APInt.h:70
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:290
Dependece between memory access instructions.
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
iterator end()
Definition: DenseMap.h:79
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1112
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1112
bool empty() const
Definition: LoopInfo.h:146
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:782
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:305
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
A vector that has set insertion semantics.
Definition: SetVector.h:41
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1082
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:426
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:273
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
void resize(size_type N)
Definition: SmallVector.h:351