LLVM  8.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  if (auto *CI = dyn_cast<CastInst>(V))
140  if (CI->getOperand(0)->getType()->isIntegerTy())
141  return CI->getOperand(0);
142  return V;
143 }
144 
146  const ValueToValueMap &PtrToStride,
147  Value *Ptr, Value *OrigPtr) {
148  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
149 
150  // If there is an entry in the map return the SCEV of the pointer with the
151  // symbolic stride replaced by one.
153  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
154  if (SI != PtrToStride.end()) {
155  Value *StrideVal = SI->second;
156 
157  // Strip casts.
158  StrideVal = stripIntegerCast(StrideVal);
159 
160  ScalarEvolution *SE = PSE.getSE();
161  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
162  const auto *CT =
163  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
164 
165  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
166  auto *Expr = PSE.getSCEV(Ptr);
167 
168  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
169  << " by: " << *Expr << "\n");
170  return Expr;
171  }
172 
173  // Otherwise, just return the SCEV of the original pointer.
174  return OrigSCEV;
175 }
176 
177 /// Calculate Start and End points of memory access.
178 /// Let's assume A is the first access and B is a memory access on N-th loop
179 /// iteration. Then B is calculated as:
180 /// B = A + Step*N .
181 /// Step value may be positive or negative.
182 /// N is a calculated back-edge taken count:
183 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
184 /// Start and End points are calculated in the following way:
185 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
186 /// where SizeOfElt is the size of single memory access in bytes.
187 ///
188 /// There is no conflict when the intervals are disjoint:
189 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
190 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
191  unsigned DepSetId, unsigned ASId,
192  const ValueToValueMap &Strides,
194  // Get the stride replaced scev.
195  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
196  ScalarEvolution *SE = PSE.getSE();
197 
198  const SCEV *ScStart;
199  const SCEV *ScEnd;
200 
201  if (SE->isLoopInvariant(Sc, Lp))
202  ScStart = ScEnd = Sc;
203  else {
204  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
205  assert(AR && "Invalid addrec expression");
206  const SCEV *Ex = PSE.getBackedgeTakenCount();
207 
208  ScStart = AR->getStart();
209  ScEnd = AR->evaluateAtIteration(Ex, *SE);
210  const SCEV *Step = AR->getStepRecurrence(*SE);
211 
212  // For expressions with negative step, the upper bound is ScStart and the
213  // lower bound is ScEnd.
214  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
215  if (CStep->getValue()->isNegative())
216  std::swap(ScStart, ScEnd);
217  } else {
218  // Fallback case: the step is not constant, but we can still
219  // get the upper and lower bounds of the interval by using min/max
220  // expressions.
221  ScStart = SE->getUMinExpr(ScStart, ScEnd);
222  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
223  }
224  // Add the size of the pointed element to ScEnd.
225  unsigned EltSize =
227  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
228  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
229  }
230 
231  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
232 }
233 
235 RuntimePointerChecking::generateChecks() const {
237 
238  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
239  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
240  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
241  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
242 
243  if (needsChecking(CGI, CGJ))
244  Checks.push_back(std::make_pair(&CGI, &CGJ));
245  }
246  }
247  return Checks;
248 }
249 
250 void RuntimePointerChecking::generateChecks(
251  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
252  assert(Checks.empty() && "Checks is not empty");
253  groupChecks(DepCands, UseDependencies);
254  Checks = generateChecks();
255 }
256 
258  const CheckingPtrGroup &N) const {
259  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
260  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
261  if (needsChecking(M.Members[I], N.Members[J]))
262  return true;
263  return false;
264 }
265 
266 /// Compare \p I and \p J and return the minimum.
267 /// Return nullptr in case we couldn't find an answer.
268 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
269  ScalarEvolution *SE) {
270  const SCEV *Diff = SE->getMinusSCEV(J, I);
271  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
272 
273  if (!C)
274  return nullptr;
275  if (C->getValue()->isNegative())
276  return J;
277  return I;
278 }
279 
281  const SCEV *Start = RtCheck.Pointers[Index].Start;
282  const SCEV *End = RtCheck.Pointers[Index].End;
283 
284  // Compare the starts and ends with the known minimum and maximum
285  // of this set. We need to know how we compare against the min/max
286  // of the set in order to be able to emit memchecks.
287  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
288  if (!Min0)
289  return false;
290 
291  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
292  if (!Min1)
293  return false;
294 
295  // Update the low bound expression if we've found a new min value.
296  if (Min0 == Start)
297  Low = Start;
298 
299  // Update the high bound expression if we've found a new max value.
300  if (Min1 != End)
301  High = End;
302 
303  Members.push_back(Index);
304  return true;
305 }
306 
307 void RuntimePointerChecking::groupChecks(
308  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
309  // We build the groups from dependency candidates equivalence classes
310  // because:
311  // - We know that pointers in the same equivalence class share
312  // the same underlying object and therefore there is a chance
313  // that we can compare pointers
314  // - We wouldn't be able to merge two pointers for which we need
315  // to emit a memcheck. The classes in DepCands are already
316  // conveniently built such that no two pointers in the same
317  // class need checking against each other.
318 
319  // We use the following (greedy) algorithm to construct the groups
320  // For every pointer in the equivalence class:
321  // For each existing group:
322  // - if the difference between this pointer and the min/max bounds
323  // of the group is a constant, then make the pointer part of the
324  // group and update the min/max bounds of that group as required.
325 
326  CheckingGroups.clear();
327 
328  // If we need to check two pointers to the same underlying object
329  // with a non-constant difference, we shouldn't perform any pointer
330  // grouping with those pointers. This is because we can easily get
331  // into cases where the resulting check would return false, even when
332  // the accesses are safe.
333  //
334  // The following example shows this:
335  // for (i = 0; i < 1000; ++i)
336  // a[5000 + i * m] = a[i] + a[i + 9000]
337  //
338  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
339  // (0, 10000) which is always false. However, if m is 1, there is no
340  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
341  // us to perform an accurate check in this case.
342  //
343  // The above case requires that we have an UnknownDependence between
344  // accesses to the same underlying object. This cannot happen unless
345  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
346  // is also false. In this case we will use the fallback path and create
347  // separate checking groups for all pointers.
348 
349  // If we don't have the dependency partitions, construct a new
350  // checking pointer group for each pointer. This is also required
351  // for correctness, because in this case we can have checking between
352  // pointers to the same underlying object.
353  if (!UseDependencies) {
354  for (unsigned I = 0; I < Pointers.size(); ++I)
355  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
356  return;
357  }
358 
359  unsigned TotalComparisons = 0;
360 
361  DenseMap<Value *, unsigned> PositionMap;
362  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
363  PositionMap[Pointers[Index].PointerValue] = Index;
364 
365  // We need to keep track of what pointers we've already seen so we
366  // don't process them twice.
368 
369  // Go through all equivalence classes, get the "pointer check groups"
370  // and add them to the overall solution. We use the order in which accesses
371  // appear in 'Pointers' to enforce determinism.
372  for (unsigned I = 0; I < Pointers.size(); ++I) {
373  // We've seen this pointer before, and therefore already processed
374  // its equivalence class.
375  if (Seen.count(I))
376  continue;
377 
378  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
379  Pointers[I].IsWritePtr);
380 
382  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
383 
384  // Because DepCands is constructed by visiting accesses in the order in
385  // which they appear in alias sets (which is deterministic) and the
386  // iteration order within an equivalence class member is only dependent on
387  // the order in which unions and insertions are performed on the
388  // equivalence class, the iteration order is deterministic.
389  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
390  MI != ME; ++MI) {
391  unsigned Pointer = PositionMap[MI->getPointer()];
392  bool Merged = false;
393  // Mark this pointer as seen.
394  Seen.insert(Pointer);
395 
396  // Go through all the existing sets and see if we can find one
397  // which can include this pointer.
398  for (CheckingPtrGroup &Group : Groups) {
399  // Don't perform more than a certain amount of comparisons.
400  // This should limit the cost of grouping the pointers to something
401  // reasonable. If we do end up hitting this threshold, the algorithm
402  // will create separate groups for all remaining pointers.
403  if (TotalComparisons > MemoryCheckMergeThreshold)
404  break;
405 
406  TotalComparisons++;
407 
408  if (Group.addPointer(Pointer)) {
409  Merged = true;
410  break;
411  }
412  }
413 
414  if (!Merged)
415  // We couldn't add this pointer to any existing set or the threshold
416  // for the number of comparisons has been reached. Create a new group
417  // to hold the current pointer.
418  Groups.push_back(CheckingPtrGroup(Pointer, *this));
419  }
420 
421  // We've computed the grouped checks for this partition.
422  // Save the results and continue with the next one.
423  llvm::copy(Groups, std::back_inserter(CheckingGroups));
424  }
425 }
426 
428  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
429  unsigned PtrIdx2) {
430  return (PtrToPartition[PtrIdx1] != -1 &&
431  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
432 }
433 
434 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
435  const PointerInfo &PointerI = Pointers[I];
436  const PointerInfo &PointerJ = Pointers[J];
437 
438  // No need to check if two readonly pointers intersect.
439  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
440  return false;
441 
442  // Only need to check pointers between two different dependency sets.
443  if (PointerI.DependencySetId == PointerJ.DependencySetId)
444  return false;
445 
446  // Only need to check pointers in the same alias set.
447  if (PointerI.AliasSetId != PointerJ.AliasSetId)
448  return false;
449 
450  return true;
451 }
452 
454  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
455  unsigned Depth) const {
456  unsigned N = 0;
457  for (const auto &Check : Checks) {
458  const auto &First = Check.first->Members, &Second = Check.second->Members;
459 
460  OS.indent(Depth) << "Check " << N++ << ":\n";
461 
462  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
463  for (unsigned K = 0; K < First.size(); ++K)
464  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
465 
466  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
467  for (unsigned K = 0; K < Second.size(); ++K)
468  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
469  }
470 }
471 
473 
474  OS.indent(Depth) << "Run-time memory checks:\n";
475  printChecks(OS, Checks, Depth);
476 
477  OS.indent(Depth) << "Grouped accesses:\n";
478  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
479  const auto &CG = CheckingGroups[I];
480 
481  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
482  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
483  << ")\n";
484  for (unsigned J = 0; J < CG.Members.size(); ++J) {
485  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
486  << "\n";
487  }
488  }
489 }
490 
491 namespace {
492 
493 /// Analyses memory accesses in a loop.
494 ///
495 /// Checks whether run time pointer checks are needed and builds sets for data
496 /// dependence checking.
497 class AccessAnalysis {
498 public:
499  /// Read or write access location.
500  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
501  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
502 
503  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
506  : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
507  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
508 
509  /// Register a load and whether it is only read from.
510  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
511  Value *Ptr = const_cast<Value*>(Loc.Ptr);
512  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
513  Accesses.insert(MemAccessInfo(Ptr, false));
514  if (IsReadOnly)
515  ReadOnlyPtr.insert(Ptr);
516  }
517 
518  /// Register a store.
519  void addStore(MemoryLocation &Loc) {
520  Value *Ptr = const_cast<Value*>(Loc.Ptr);
521  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
522  Accesses.insert(MemAccessInfo(Ptr, true));
523  }
524 
525  /// Check if we can emit a run-time no-alias check for \p Access.
526  ///
527  /// Returns true if we can emit a run-time no alias check for \p Access.
528  /// If we can check this access, this also adds it to a dependence set and
529  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
530  /// we will attempt to use additional run-time checks in order to get
531  /// the bounds of the pointer.
532  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
533  MemAccessInfo Access,
534  const ValueToValueMap &Strides,
535  DenseMap<Value *, unsigned> &DepSetId,
536  Loop *TheLoop, unsigned &RunningDepId,
537  unsigned ASId, bool ShouldCheckStride,
538  bool Assume);
539 
540  /// Check whether we can check the pointers at runtime for
541  /// non-intersection.
542  ///
543  /// Returns true if we need no check or if we do and we can generate them
544  /// (i.e. the pointers have computable bounds).
545  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
546  Loop *TheLoop, const ValueToValueMap &Strides,
547  bool ShouldCheckWrap = false);
548 
549  /// Goes over all memory accesses, checks whether a RT check is needed
550  /// and builds sets of dependent accesses.
551  void buildDependenceSets() {
552  processMemAccesses();
553  }
554 
555  /// Initial processing of memory accesses determined that we need to
556  /// perform dependency checking.
557  ///
558  /// Note that this can later be cleared if we retry memcheck analysis without
559  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
560  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
561 
562  /// We decided that no dependence analysis would be used. Reset the state.
563  void resetDepChecks(MemoryDepChecker &DepChecker) {
564  CheckDeps.clear();
565  DepChecker.clearDependences();
566  }
567 
568  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
569 
570 private:
571  typedef SetVector<MemAccessInfo> PtrAccessSet;
572 
573  /// Go over all memory access and check whether runtime pointer checks
574  /// are needed and build sets of dependency check candidates.
575  void processMemAccesses();
576 
577  /// Set of all accesses.
578  PtrAccessSet Accesses;
579 
580  const DataLayout &DL;
581 
582  /// The loop being checked.
583  const Loop *TheLoop;
584 
585  /// List of accesses that need a further dependence check.
586  MemAccessInfoList CheckDeps;
587 
588  /// Set of pointers that are read only.
589  SmallPtrSet<Value*, 16> ReadOnlyPtr;
590 
591  /// An alias set tracker to partition the access set by underlying object and
592  //intrinsic property (such as TBAA metadata).
593  AliasSetTracker AST;
594 
595  LoopInfo *LI;
596 
597  /// Sets of potentially dependent accesses - members of one set share an
598  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
599  /// dependence check.
601 
602  /// Initial processing of memory accesses determined that we may need
603  /// to add memchecks. Perform the analysis to determine the necessary checks.
604  ///
605  /// Note that, this is different from isDependencyCheckNeeded. When we retry
606  /// memcheck analysis without dependency checking
607  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
608  /// while this remains set if we have potentially dependent accesses.
609  bool IsRTCheckAnalysisNeeded;
610 
611  /// The SCEV predicate containing all the SCEV-related assumptions.
613 };
614 
615 } // end anonymous namespace
616 
617 /// Check whether a pointer can participate in a runtime bounds check.
618 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
619 /// by adding run-time checks (overflow checks) if necessary.
621  const ValueToValueMap &Strides, Value *Ptr,
622  Loop *L, bool Assume) {
623  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624 
625  // The bounds for loop-invariant pointer is trivial.
626  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
627  return true;
628 
629  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
630 
631  if (!AR && Assume)
632  AR = PSE.getAsAddRec(Ptr);
633 
634  if (!AR)
635  return false;
636 
637  return AR->isAffine();
638 }
639 
640 /// Check whether a pointer address cannot wrap.
642  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
643  const SCEV *PtrScev = PSE.getSCEV(Ptr);
644  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
645  return true;
646 
647  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
648  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
649  return true;
650 
651  return false;
652 }
653 
654 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
655  MemAccessInfo Access,
656  const ValueToValueMap &StridesMap,
657  DenseMap<Value *, unsigned> &DepSetId,
658  Loop *TheLoop, unsigned &RunningDepId,
659  unsigned ASId, bool ShouldCheckWrap,
660  bool Assume) {
661  Value *Ptr = Access.getPointer();
662 
663  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
664  return false;
665 
666  // When we run after a failing dependency check we have to make sure
667  // we don't have wrapping pointers.
668  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
669  auto *Expr = PSE.getSCEV(Ptr);
670  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
671  return false;
672  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
673  }
674 
675  // The id of the dependence set.
676  unsigned DepId;
677 
678  if (isDependencyCheckNeeded()) {
679  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
680  unsigned &LeaderId = DepSetId[Leader];
681  if (!LeaderId)
682  LeaderId = RunningDepId++;
683  DepId = LeaderId;
684  } else
685  // Each access has its own dependence set.
686  DepId = RunningDepId++;
687 
688  bool IsWrite = Access.getInt();
689  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
690  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
691 
692  return true;
693  }
694 
695 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
696  ScalarEvolution *SE, Loop *TheLoop,
697  const ValueToValueMap &StridesMap,
698  bool ShouldCheckWrap) {
699  // Find pointers with computable bounds. We are going to use this information
700  // to place a runtime bound check.
701  bool CanDoRT = true;
702 
703  bool NeedRTCheck = false;
704  if (!IsRTCheckAnalysisNeeded) return true;
705 
706  bool IsDepCheckNeeded = isDependencyCheckNeeded();
707 
708  // We assign a consecutive id to access from different alias sets.
709  // Accesses between different groups doesn't need to be checked.
710  unsigned ASId = 1;
711  for (auto &AS : AST) {
712  int NumReadPtrChecks = 0;
713  int NumWritePtrChecks = 0;
714  bool CanDoAliasSetRT = true;
715 
716  // We assign consecutive id to access from different dependence sets.
717  // Accesses within the same set don't need a runtime check.
718  unsigned RunningDepId = 1;
720 
722 
723  for (auto A : AS) {
724  Value *Ptr = A.getValue();
725  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
726  MemAccessInfo Access(Ptr, IsWrite);
727 
728  if (IsWrite)
729  ++NumWritePtrChecks;
730  else
731  ++NumReadPtrChecks;
732 
733  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
734  RunningDepId, ASId, ShouldCheckWrap, false)) {
735  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
736  Retries.push_back(Access);
737  CanDoAliasSetRT = false;
738  }
739  }
740 
741  // If we have at least two writes or one write and a read then we need to
742  // check them. But there is no need to checks if there is only one
743  // dependence set for this alias set.
744  //
745  // Note that this function computes CanDoRT and NeedRTCheck independently.
746  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
747  // for which we couldn't find the bounds but we don't actually need to emit
748  // any checks so it does not matter.
749  bool NeedsAliasSetRTCheck = false;
750  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
751  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
752  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
753 
754  // We need to perform run-time alias checks, but some pointers had bounds
755  // that couldn't be checked.
756  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
757  // Reset the CanDoSetRt flag and retry all accesses that have failed.
758  // We know that we need these checks, so we can now be more aggressive
759  // and add further checks if required (overflow checks).
760  CanDoAliasSetRT = true;
761  for (auto Access : Retries)
762  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
763  TheLoop, RunningDepId, ASId,
764  ShouldCheckWrap, /*Assume=*/true)) {
765  CanDoAliasSetRT = false;
766  break;
767  }
768  }
769 
770  CanDoRT &= CanDoAliasSetRT;
771  NeedRTCheck |= NeedsAliasSetRTCheck;
772  ++ASId;
773  }
774 
775  // If the pointers that we would use for the bounds comparison have different
776  // address spaces, assume the values aren't directly comparable, so we can't
777  // use them for the runtime check. We also have to assume they could
778  // overlap. In the future there should be metadata for whether address spaces
779  // are disjoint.
780  unsigned NumPointers = RtCheck.Pointers.size();
781  for (unsigned i = 0; i < NumPointers; ++i) {
782  for (unsigned j = i + 1; j < NumPointers; ++j) {
783  // Only need to check pointers between two different dependency sets.
784  if (RtCheck.Pointers[i].DependencySetId ==
785  RtCheck.Pointers[j].DependencySetId)
786  continue;
787  // Only need to check pointers in the same alias set.
788  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
789  continue;
790 
791  Value *PtrI = RtCheck.Pointers[i].PointerValue;
792  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
793 
794  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
795  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
796  if (ASi != ASj) {
797  LLVM_DEBUG(
798  dbgs() << "LAA: Runtime check would require comparison between"
799  " different address spaces\n");
800  return false;
801  }
802  }
803  }
804 
805  if (NeedRTCheck && CanDoRT)
806  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
807 
808  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
809  << " pointer comparisons.\n");
810 
811  RtCheck.Need = NeedRTCheck;
812 
813  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
814  if (!CanDoRTIfNeeded)
815  RtCheck.reset();
816  return CanDoRTIfNeeded;
817 }
818 
819 void AccessAnalysis::processMemAccesses() {
820  // We process the set twice: first we process read-write pointers, last we
821  // process read-only pointers. This allows us to skip dependence tests for
822  // read-only pointers.
823 
824  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
825  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
826  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
827  LLVM_DEBUG({
828  for (auto A : Accesses)
829  dbgs() << "\t" << *A.getPointer() << " (" <<
830  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
831  "read-only" : "read")) << ")\n";
832  });
833 
834  // The AliasSetTracker has nicely partitioned our pointers by metadata
835  // compatibility and potential for underlying-object overlap. As a result, we
836  // only need to check for potential pointer dependencies within each alias
837  // set.
838  for (auto &AS : AST) {
839  // Note that both the alias-set tracker and the alias sets themselves used
840  // linked lists internally and so the iteration order here is deterministic
841  // (matching the original instruction order within each set).
842 
843  bool SetHasWrite = false;
844 
845  // Map of pointers to last access encountered.
846  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
847  UnderlyingObjToAccessMap ObjToLastAccess;
848 
849  // Set of access to check after all writes have been processed.
850  PtrAccessSet DeferredAccesses;
851 
852  // Iterate over each alias set twice, once to process read/write pointers,
853  // and then to process read-only pointers.
854  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
855  bool UseDeferred = SetIteration > 0;
856  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
857 
858  for (auto AV : AS) {
859  Value *Ptr = AV.getValue();
860 
861  // For a single memory access in AliasSetTracker, Accesses may contain
862  // both read and write, and they both need to be handled for CheckDeps.
863  for (auto AC : S) {
864  if (AC.getPointer() != Ptr)
865  continue;
866 
867  bool IsWrite = AC.getInt();
868 
869  // If we're using the deferred access set, then it contains only
870  // reads.
871  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
872  if (UseDeferred && !IsReadOnlyPtr)
873  continue;
874  // Otherwise, the pointer must be in the PtrAccessSet, either as a
875  // read or a write.
876  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
877  S.count(MemAccessInfo(Ptr, false))) &&
878  "Alias-set pointer not in the access set?");
879 
880  MemAccessInfo Access(Ptr, IsWrite);
881  DepCands.insert(Access);
882 
883  // Memorize read-only pointers for later processing and skip them in
884  // the first round (they need to be checked after we have seen all
885  // write pointers). Note: we also mark pointer that are not
886  // consecutive as "read-only" pointers (so that we check
887  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
888  if (!UseDeferred && IsReadOnlyPtr) {
889  DeferredAccesses.insert(Access);
890  continue;
891  }
892 
893  // If this is a write - check other reads and writes for conflicts. If
894  // this is a read only check other writes for conflicts (but only if
895  // there is no other write to the ptr - this is an optimization to
896  // catch "a[i] = a[i] + " without having to do a dependence check).
897  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
898  CheckDeps.push_back(Access);
899  IsRTCheckAnalysisNeeded = true;
900  }
901 
902  if (IsWrite)
903  SetHasWrite = true;
904 
905  // Create sets of pointers connected by a shared alias set and
906  // underlying object.
907  typedef SmallVector<Value *, 16> ValueVector;
908  ValueVector TempObjects;
909 
910  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
911  LLVM_DEBUG(dbgs()
912  << "Underlying objects for pointer " << *Ptr << "\n");
913  for (Value *UnderlyingObj : TempObjects) {
914  // nullptr never alias, don't join sets for pointer that have "null"
915  // in their UnderlyingObjects list.
916  if (isa<ConstantPointerNull>(UnderlyingObj) &&
918  TheLoop->getHeader()->getParent(),
919  UnderlyingObj->getType()->getPointerAddressSpace()))
920  continue;
921 
922  UnderlyingObjToAccessMap::iterator Prev =
923  ObjToLastAccess.find(UnderlyingObj);
924  if (Prev != ObjToLastAccess.end())
925  DepCands.unionSets(Access, Prev->second);
926 
927  ObjToLastAccess[UnderlyingObj] = Access;
928  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
929  }
930  }
931  }
932  }
933  }
934 }
935 
936 static bool isInBoundsGep(Value *Ptr) {
937  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
938  return GEP->isInBounds();
939  return false;
940 }
941 
942 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
943 /// i.e. monotonically increasing/decreasing.
944 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
945  PredicatedScalarEvolution &PSE, const Loop *L) {
946  // FIXME: This should probably only return true for NUW.
948  return true;
949 
950  // Scalar evolution does not propagate the non-wrapping flags to values that
951  // are derived from a non-wrapping induction variable because non-wrapping
952  // could be flow-sensitive.
953  //
954  // Look through the potentially overflowing instruction to try to prove
955  // non-wrapping for the *specific* value of Ptr.
956 
957  // The arithmetic implied by an inbounds GEP can't overflow.
958  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
959  if (!GEP || !GEP->isInBounds())
960  return false;
961 
962  // Make sure there is only one non-const index and analyze that.
963  Value *NonConstIndex = nullptr;
964  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
965  if (!isa<ConstantInt>(Index)) {
966  if (NonConstIndex)
967  return false;
968  NonConstIndex = Index;
969  }
970  if (!NonConstIndex)
971  // The recurrence is on the pointer, ignore for now.
972  return false;
973 
974  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
975  // AddRec using a NSW operation.
976  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
977  if (OBO->hasNoSignedWrap() &&
978  // Assume constant for other the operand so that the AddRec can be
979  // easily found.
980  isa<ConstantInt>(OBO->getOperand(1))) {
981  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
982 
983  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
984  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
985  }
986 
987  return false;
988 }
989 
990 /// Check whether the access through \p Ptr has a constant stride.
992  const Loop *Lp, const ValueToValueMap &StridesMap,
993  bool Assume, bool ShouldCheckWrap) {
994  Type *Ty = Ptr->getType();
995  assert(Ty->isPointerTy() && "Unexpected non-ptr");
996 
997  // Make sure that the pointer does not point to aggregate types.
998  auto *PtrTy = cast<PointerType>(Ty);
999  if (PtrTy->getElementType()->isAggregateType()) {
1000  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1001  << *Ptr << "\n");
1002  return 0;
1003  }
1004 
1005  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1006 
1007  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1008  if (Assume && !AR)
1009  AR = PSE.getAsAddRec(Ptr);
1010 
1011  if (!AR) {
1012  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1013  << " SCEV: " << *PtrScev << "\n");
1014  return 0;
1015  }
1016 
1017  // The accesss function must stride over the innermost loop.
1018  if (Lp != AR->getLoop()) {
1019  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1020  << *Ptr << " SCEV: " << *AR << "\n");
1021  return 0;
1022  }
1023 
1024  // The address calculation must not wrap. Otherwise, a dependence could be
1025  // inverted.
1026  // An inbounds getelementptr that is a AddRec with a unit stride
1027  // cannot wrap per definition. The unit stride requirement is checked later.
1028  // An getelementptr without an inbounds attribute and unit stride would have
1029  // to access the pointer value "0" which is undefined behavior in address
1030  // space 0, therefore we can also vectorize this case.
1031  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1032  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1034  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1035  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1037  PtrTy->getAddressSpace())) {
1038  if (Assume) {
1040  IsNoWrapAddRec = true;
1041  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1042  << "LAA: Pointer: " << *Ptr << "\n"
1043  << "LAA: SCEV: " << *AR << "\n"
1044  << "LAA: Added an overflow assumption\n");
1045  } else {
1046  LLVM_DEBUG(
1047  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1048  << *Ptr << " SCEV: " << *AR << "\n");
1049  return 0;
1050  }
1051  }
1052 
1053  // Check the step is constant.
1054  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1055 
1056  // Calculate the pointer stride and check if it is constant.
1057  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1058  if (!C) {
1059  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1060  << " SCEV: " << *AR << "\n");
1061  return 0;
1062  }
1063 
1064  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1065  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1066  const APInt &APStepVal = C->getAPInt();
1067 
1068  // Huge step value - give up.
1069  if (APStepVal.getBitWidth() > 64)
1070  return 0;
1071 
1072  int64_t StepVal = APStepVal.getSExtValue();
1073 
1074  // Strided access.
1075  int64_t Stride = StepVal / Size;
1076  int64_t Rem = StepVal % Size;
1077  if (Rem)
1078  return 0;
1079 
1080  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1081  // know we can't "wrap around the address space". In case of address space
1082  // zero we know that this won't happen without triggering undefined behavior.
1083  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1084  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1085  PtrTy->getAddressSpace()))) {
1086  if (Assume) {
1087  // We can avoid this case by adding a run-time check.
1088  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1089  << "inbouds or in address space 0 may wrap:\n"
1090  << "LAA: Pointer: " << *Ptr << "\n"
1091  << "LAA: SCEV: " << *AR << "\n"
1092  << "LAA: Added an overflow assumption\n");
1094  } else
1095  return 0;
1096  }
1097 
1098  return Stride;
1099 }
1100 
1102  ScalarEvolution &SE,
1103  SmallVectorImpl<unsigned> &SortedIndices) {
1105  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1106  "Expected list of pointer operands.");
1108  OffValPairs.reserve(VL.size());
1109 
1110  // Walk over the pointers, and map each of them to an offset relative to
1111  // first pointer in the array.
1112  Value *Ptr0 = VL[0];
1113  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1114  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1115 
1117  for (auto *Ptr : VL) {
1118  // TODO: Outline this code as a special, more time consuming, version of
1119  // computeConstantDifference() function.
1120  if (Ptr->getType()->getPointerAddressSpace() !=
1121  Ptr0->getType()->getPointerAddressSpace())
1122  return false;
1123  // If a pointer refers to a different underlying object, bail - the
1124  // pointers are by definition incomparable.
1125  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1126  if (CurrObj != Obj0)
1127  return false;
1128 
1129  const SCEV *Scev = SE.getSCEV(Ptr);
1130  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1131  // The pointers may not have a constant offset from each other, or SCEV
1132  // may just not be smart enough to figure out they do. Regardless,
1133  // there's nothing we can do.
1134  if (!Diff)
1135  return false;
1136 
1137  // Check if the pointer with the same offset is found.
1138  int64_t Offset = Diff->getAPInt().getSExtValue();
1139  if (!Offsets.insert(Offset).second)
1140  return false;
1141  OffValPairs.emplace_back(Offset, Ptr);
1142  }
1143  SortedIndices.clear();
1144  SortedIndices.resize(VL.size());
1145  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1146 
1147  // Sort the memory accesses and keep the order of their uses in UseOrder.
1148  std::stable_sort(SortedIndices.begin(), SortedIndices.end(),
1149  [&OffValPairs](unsigned Left, unsigned Right) {
1150  return OffValPairs[Left].first < OffValPairs[Right].first;
1151  });
1152 
1153  // Check if the order is consecutive already.
1154  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1155  return I == SortedIndices[I];
1156  }))
1157  SortedIndices.clear();
1158 
1159  return true;
1160 }
1161 
1162 /// Take the address space operand from the Load/Store instruction.
1163 /// Returns -1 if this is not a valid Load/Store instruction.
1164 static unsigned getAddressSpaceOperand(Value *I) {
1165  if (LoadInst *L = dyn_cast<LoadInst>(I))
1166  return L->getPointerAddressSpace();
1167  if (StoreInst *S = dyn_cast<StoreInst>(I))
1168  return S->getPointerAddressSpace();
1169  return -1;
1170 }
1171 
1172 /// Returns true if the memory operations \p A and \p B are consecutive.
1174  ScalarEvolution &SE, bool CheckType) {
1175  Value *PtrA = getLoadStorePointerOperand(A);
1176  Value *PtrB = getLoadStorePointerOperand(B);
1177  unsigned ASA = getAddressSpaceOperand(A);
1178  unsigned ASB = getAddressSpaceOperand(B);
1179 
1180  // Check that the address spaces match and that the pointers are valid.
1181  if (!PtrA || !PtrB || (ASA != ASB))
1182  return false;
1183 
1184  // Make sure that A and B are different pointers.
1185  if (PtrA == PtrB)
1186  return false;
1187 
1188  // Make sure that A and B have the same type if required.
1189  if (CheckType && PtrA->getType() != PtrB->getType())
1190  return false;
1191 
1192  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1193  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1194  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1195 
1196  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1197  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1198  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1199 
1200  // OffsetDelta = OffsetB - OffsetA;
1201  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1202  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1203  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1204  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1205  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1206  // Check if they are based on the same pointer. That makes the offsets
1207  // sufficient.
1208  if (PtrA == PtrB)
1209  return OffsetDelta == Size;
1210 
1211  // Compute the necessary base pointer delta to have the necessary final delta
1212  // equal to the size.
1213  // BaseDelta = Size - OffsetDelta;
1214  const SCEV *SizeSCEV = SE.getConstant(Size);
1215  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1216 
1217  // Otherwise compute the distance with SCEV between the base pointers.
1218  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1219  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1220  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1221  return X == PtrSCEVB;
1222 }
1223 
1225  switch (Type) {
1226  case NoDep:
1227  case Forward:
1228  case BackwardVectorizable:
1229  return true;
1230 
1231  case Unknown:
1232  case ForwardButPreventsForwarding:
1233  case Backward:
1234  case BackwardVectorizableButPreventsForwarding:
1235  return false;
1236  }
1237  llvm_unreachable("unexpected DepType!");
1238 }
1239 
1241  switch (Type) {
1242  case NoDep:
1243  case Forward:
1244  case ForwardButPreventsForwarding:
1245  case Unknown:
1246  return false;
1247 
1248  case BackwardVectorizable:
1249  case Backward:
1250  case BackwardVectorizableButPreventsForwarding:
1251  return true;
1252  }
1253  llvm_unreachable("unexpected DepType!");
1254 }
1255 
1257  return isBackward() || Type == Unknown;
1258 }
1259 
1261  switch (Type) {
1262  case Forward:
1263  case ForwardButPreventsForwarding:
1264  return true;
1265 
1266  case NoDep:
1267  case Unknown:
1268  case BackwardVectorizable:
1269  case Backward:
1270  case BackwardVectorizableButPreventsForwarding:
1271  return false;
1272  }
1273  llvm_unreachable("unexpected DepType!");
1274 }
1275 
1276 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1277  uint64_t TypeByteSize) {
1278  // If loads occur at a distance that is not a multiple of a feasible vector
1279  // factor store-load forwarding does not take place.
1280  // Positive dependences might cause troubles because vectorizing them might
1281  // prevent store-load forwarding making vectorized code run a lot slower.
1282  // a[i] = a[i-3] ^ a[i-8];
1283  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1284  // hence on your typical architecture store-load forwarding does not take
1285  // place. Vectorizing in such cases does not make sense.
1286  // Store-load forwarding distance.
1287 
1288  // After this many iterations store-to-load forwarding conflicts should not
1289  // cause any slowdowns.
1290  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1291  // Maximum vector factor.
1292  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1293  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1294 
1295  // Compute the smallest VF at which the store and load would be misaligned.
1296  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1297  VF *= 2) {
1298  // If the number of vector iteration between the store and the load are
1299  // small we could incur conflicts.
1300  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1301  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1302  break;
1303  }
1304  }
1305 
1306  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1307  LLVM_DEBUG(
1308  dbgs() << "LAA: Distance " << Distance
1309  << " that could cause a store-load forwarding conflict\n");
1310  return true;
1311  }
1312 
1313  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1314  MaxVFWithoutSLForwardIssues !=
1315  VectorizerParams::MaxVectorWidth * TypeByteSize)
1316  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1317  return false;
1318 }
1319 
1320 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1321 /// memory accesses, that have the same stride whose absolute value is given
1322 /// in \p Stride, and that have the same type size \p TypeByteSize,
1323 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1324 /// possible to prove statically that the dependence distance is larger
1325 /// than the range that the accesses will travel through the execution of
1326 /// the loop. If so, return true; false otherwise. This is useful for
1327 /// example in loops such as the following (PR31098):
1328 /// for (i = 0; i < D; ++i) {
1329 /// = out[i];
1330 /// out[i+D] =
1331 /// }
1333  const SCEV &BackedgeTakenCount,
1334  const SCEV &Dist, uint64_t Stride,
1335  uint64_t TypeByteSize) {
1336 
1337  // If we can prove that
1338  // (**) |Dist| > BackedgeTakenCount * Step
1339  // where Step is the absolute stride of the memory accesses in bytes,
1340  // then there is no dependence.
1341  //
1342  // Ratioanle:
1343  // We basically want to check if the absolute distance (|Dist/Step|)
1344  // is >= the loop iteration count (or > BackedgeTakenCount).
1345  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1346  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1347  // that the dependence distance is >= VF; This is checked elsewhere.
1348  // But in some cases we can prune unknown dependence distances early, and
1349  // even before selecting the VF, and without a runtime test, by comparing
1350  // the distance against the loop iteration count. Since the vectorized code
1351  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1352  // also guarantees that distance >= VF.
1353  //
1354  const uint64_t ByteStride = Stride * TypeByteSize;
1355  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1356  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1357 
1358  const SCEV *CastedDist = &Dist;
1359  const SCEV *CastedProduct = Product;
1360  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1361  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1362 
1363  // The dependence distance can be positive/negative, so we sign extend Dist;
1364  // The multiplication of the absolute stride in bytes and the
1365  // backdgeTakenCount is non-negative, so we zero extend Product.
1366  if (DistTypeSize > ProductTypeSize)
1367  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1368  else
1369  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1370 
1371  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1372  // (If so, then we have proven (**) because |Dist| >= Dist)
1373  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1374  if (SE.isKnownPositive(Minus))
1375  return true;
1376 
1377  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1378  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1379  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1380  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1381  if (SE.isKnownPositive(Minus))
1382  return true;
1383 
1384  return false;
1385 }
1386 
1387 /// Check the dependence for two accesses with the same stride \p Stride.
1388 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1389 /// bytes.
1390 ///
1391 /// \returns true if they are independent.
1392 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1393  uint64_t TypeByteSize) {
1394  assert(Stride > 1 && "The stride must be greater than 1");
1395  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1396  assert(Distance > 0 && "The distance must be non-zero");
1397 
1398  // Skip if the distance is not multiple of type byte size.
1399  if (Distance % TypeByteSize)
1400  return false;
1401 
1402  uint64_t ScaledDist = Distance / TypeByteSize;
1403 
1404  // No dependence if the scaled distance is not multiple of the stride.
1405  // E.g.
1406  // for (i = 0; i < 1024 ; i += 4)
1407  // A[i+2] = A[i] + 1;
1408  //
1409  // Two accesses in memory (scaled distance is 2, stride is 4):
1410  // | A[0] | | | | A[4] | | | |
1411  // | | | A[2] | | | | A[6] | |
1412  //
1413  // E.g.
1414  // for (i = 0; i < 1024 ; i += 3)
1415  // A[i+4] = A[i] + 1;
1416  //
1417  // Two accesses in memory (scaled distance is 4, stride is 3):
1418  // | A[0] | | | A[3] | | | A[6] | | |
1419  // | | | | | A[4] | | | A[7] | |
1420  return ScaledDist % Stride;
1421 }
1422 
1424 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1425  const MemAccessInfo &B, unsigned BIdx,
1426  const ValueToValueMap &Strides) {
1427  assert (AIdx < BIdx && "Must pass arguments in program order");
1428 
1429  Value *APtr = A.getPointer();
1430  Value *BPtr = B.getPointer();
1431  bool AIsWrite = A.getInt();
1432  bool BIsWrite = B.getInt();
1433 
1434  // Two reads are independent.
1435  if (!AIsWrite && !BIsWrite)
1436  return Dependence::NoDep;
1437 
1438  // We cannot check pointers in different address spaces.
1439  if (APtr->getType()->getPointerAddressSpace() !=
1440  BPtr->getType()->getPointerAddressSpace())
1441  return Dependence::Unknown;
1442 
1443  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1444  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1445 
1446  const SCEV *Src = PSE.getSCEV(APtr);
1447  const SCEV *Sink = PSE.getSCEV(BPtr);
1448 
1449  // If the induction step is negative we have to invert source and sink of the
1450  // dependence.
1451  if (StrideAPtr < 0) {
1452  std::swap(APtr, BPtr);
1453  std::swap(Src, Sink);
1454  std::swap(AIsWrite, BIsWrite);
1455  std::swap(AIdx, BIdx);
1456  std::swap(StrideAPtr, StrideBPtr);
1457  }
1458 
1459  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1460 
1461  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1462  << "(Induction step: " << StrideAPtr << ")\n");
1463  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1464  << *InstMap[BIdx] << ": " << *Dist << "\n");
1465 
1466  // Need accesses with constant stride. We don't want to vectorize
1467  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1468  // the address space.
1469  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1470  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1471  return Dependence::Unknown;
1472  }
1473 
1474  Type *ATy = APtr->getType()->getPointerElementType();
1475  Type *BTy = BPtr->getType()->getPointerElementType();
1476  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1477  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1478  uint64_t Stride = std::abs(StrideAPtr);
1479  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1480  if (!C) {
1481  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1482  isSafeDependenceDistance(DL, *(PSE.getSE()),
1483  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1484  TypeByteSize))
1485  return Dependence::NoDep;
1486 
1487  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1488  ShouldRetryWithRuntimeCheck = true;
1489  return Dependence::Unknown;
1490  }
1491 
1492  const APInt &Val = C->getAPInt();
1493  int64_t Distance = Val.getSExtValue();
1494 
1495  // Attempt to prove strided accesses independent.
1496  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1497  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1498  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1499  return Dependence::NoDep;
1500  }
1501 
1502  // Negative distances are not plausible dependencies.
1503  if (Val.isNegative()) {
1504  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1505  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1506  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1507  ATy != BTy)) {
1508  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1509  return Dependence::ForwardButPreventsForwarding;
1510  }
1511 
1512  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1513  return Dependence::Forward;
1514  }
1515 
1516  // Write to the same location with the same size.
1517  // Could be improved to assert type sizes are the same (i32 == float, etc).
1518  if (Val == 0) {
1519  if (ATy == BTy)
1520  return Dependence::Forward;
1521  LLVM_DEBUG(
1522  dbgs() << "LAA: Zero dependence difference but different types\n");
1523  return Dependence::Unknown;
1524  }
1525 
1526  assert(Val.isStrictlyPositive() && "Expect a positive value");
1527 
1528  if (ATy != BTy) {
1529  LLVM_DEBUG(
1530  dbgs()
1531  << "LAA: ReadWrite-Write positive dependency with different types\n");
1532  return Dependence::Unknown;
1533  }
1534 
1535  // Bail out early if passed-in parameters make vectorization not feasible.
1536  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1538  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1540  // The minimum number of iterations for a vectorized/unrolled version.
1541  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1542 
1543  // It's not vectorizable if the distance is smaller than the minimum distance
1544  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1545  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1546  // TypeByteSize (No need to plus the last gap distance).
1547  //
1548  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1549  // foo(int *A) {
1550  // int *B = (int *)((char *)A + 14);
1551  // for (i = 0 ; i < 1024 ; i += 2)
1552  // B[i] = A[i] + 1;
1553  // }
1554  //
1555  // Two accesses in memory (stride is 2):
1556  // | A[0] | | A[2] | | A[4] | | A[6] | |
1557  // | B[0] | | B[2] | | B[4] |
1558  //
1559  // Distance needs for vectorizing iterations except the last iteration:
1560  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1561  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1562  //
1563  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1564  // 12, which is less than distance.
1565  //
1566  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1567  // the minimum distance needed is 28, which is greater than distance. It is
1568  // not safe to do vectorization.
1569  uint64_t MinDistanceNeeded =
1570  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1571  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1572  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1573  << Distance << '\n');
1574  return Dependence::Backward;
1575  }
1576 
1577  // Unsafe if the minimum distance needed is greater than max safe distance.
1578  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1579  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1580  << MinDistanceNeeded << " size in bytes");
1581  return Dependence::Backward;
1582  }
1583 
1584  // Positive distance bigger than max vectorization factor.
1585  // FIXME: Should use max factor instead of max distance in bytes, which could
1586  // not handle different types.
1587  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1588  // void foo (int *A, char *B) {
1589  // for (unsigned i = 0; i < 1024; i++) {
1590  // A[i+2] = A[i] + 1;
1591  // B[i+2] = B[i] + 1;
1592  // }
1593  // }
1594  //
1595  // This case is currently unsafe according to the max safe distance. If we
1596  // analyze the two accesses on array B, the max safe dependence distance
1597  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1598  // is 8, which is less than 2 and forbidden vectorization, But actually
1599  // both A and B could be vectorized by 2 iterations.
1600  MaxSafeDepDistBytes =
1601  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1602 
1603  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1604  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1605  couldPreventStoreLoadForward(Distance, TypeByteSize))
1606  return Dependence::BackwardVectorizableButPreventsForwarding;
1607 
1608  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1609  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1610  << " with max VF = " << MaxVF << '\n');
1611  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1612  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1613  return Dependence::BackwardVectorizable;
1614 }
1615 
1617  MemAccessInfoList &CheckDeps,
1618  const ValueToValueMap &Strides) {
1619 
1620  MaxSafeDepDistBytes = -1;
1622  for (MemAccessInfo CurAccess : CheckDeps) {
1623  if (Visited.count(CurAccess))
1624  continue;
1625 
1626  // Get the relevant memory access set.
1628  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1629 
1630  // Check accesses within this set.
1632  AccessSets.member_begin(I);
1634  AccessSets.member_end();
1635 
1636  // Check every access pair.
1637  while (AI != AE) {
1638  Visited.insert(*AI);
1640  while (OI != AE) {
1641  // Check every accessing instruction pair in program order.
1642  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1643  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1644  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1645  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1646  auto A = std::make_pair(&*AI, *I1);
1647  auto B = std::make_pair(&*OI, *I2);
1648 
1649  assert(*I1 != *I2);
1650  if (*I1 > *I2)
1651  std::swap(A, B);
1652 
1654  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1655  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1656 
1657  // Gather dependences unless we accumulated MaxDependences
1658  // dependences. In that case return as soon as we find the first
1659  // unsafe dependence. This puts a limit on this quadratic
1660  // algorithm.
1661  if (RecordDependences) {
1662  if (Type != Dependence::NoDep)
1663  Dependences.push_back(Dependence(A.second, B.second, Type));
1664 
1665  if (Dependences.size() >= MaxDependences) {
1666  RecordDependences = false;
1667  Dependences.clear();
1668  LLVM_DEBUG(dbgs()
1669  << "Too many dependences, stopped recording\n");
1670  }
1671  }
1672  if (!RecordDependences && !SafeForVectorization)
1673  return false;
1674  }
1675  ++OI;
1676  }
1677  AI++;
1678  }
1679  }
1680 
1681  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1682  return SafeForVectorization;
1683 }
1684 
1687  MemAccessInfo Access(Ptr, isWrite);
1688  auto &IndexVector = Accesses.find(Access)->second;
1689 
1691  transform(IndexVector,
1692  std::back_inserter(Insts),
1693  [&](unsigned Idx) { return this->InstMap[Idx]; });
1694  return Insts;
1695 }
1696 
1697 const char *MemoryDepChecker::Dependence::DepName[] = {
1698  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1699  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1700 
1702  raw_ostream &OS, unsigned Depth,
1703  const SmallVectorImpl<Instruction *> &Instrs) const {
1704  OS.indent(Depth) << DepName[Type] << ":\n";
1705  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1706  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1707 }
1708 
1709 bool LoopAccessInfo::canAnalyzeLoop() {
1710  // We need to have a loop header.
1711  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1712  << TheLoop->getHeader()->getParent()->getName() << ": "
1713  << TheLoop->getHeader()->getName() << '\n');
1714 
1715  // We can only analyze innermost loops.
1716  if (!TheLoop->empty()) {
1717  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1718  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1719  return false;
1720  }
1721 
1722  // We must have a single backedge.
1723  if (TheLoop->getNumBackEdges() != 1) {
1724  LLVM_DEBUG(
1725  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1726  recordAnalysis("CFGNotUnderstood")
1727  << "loop control flow is not understood by analyzer";
1728  return false;
1729  }
1730 
1731  // We must have a single exiting block.
1732  if (!TheLoop->getExitingBlock()) {
1733  LLVM_DEBUG(
1734  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1735  recordAnalysis("CFGNotUnderstood")
1736  << "loop control flow is not understood by analyzer";
1737  return false;
1738  }
1739 
1740  // We only handle bottom-tested loops, i.e. loop in which the condition is
1741  // checked at the end of each iteration. With that we can assume that all
1742  // instructions in the loop are executed the same number of times.
1743  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1744  LLVM_DEBUG(
1745  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1746  recordAnalysis("CFGNotUnderstood")
1747  << "loop control flow is not understood by analyzer";
1748  return false;
1749  }
1750 
1751  // ScalarEvolution needs to be able to find the exit count.
1752  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1753  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1754  recordAnalysis("CantComputeNumberOfIterations")
1755  << "could not determine number of loop iterations";
1756  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1757  return false;
1758  }
1759 
1760  return true;
1761 }
1762 
1763 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1764  const TargetLibraryInfo *TLI,
1765  DominatorTree *DT) {
1766  typedef SmallPtrSet<Value*, 16> ValueSet;
1767 
1768  // Holds the Load and Store instructions.
1771 
1772  // Holds all the different accesses in the loop.
1773  unsigned NumReads = 0;
1774  unsigned NumReadWrites = 0;
1775 
1776  PtrRtChecking->Pointers.clear();
1777  PtrRtChecking->Need = false;
1778 
1779  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1780 
1781  // For each block.
1782  for (BasicBlock *BB : TheLoop->blocks()) {
1783  // Scan the BB and collect legal loads and stores.
1784  for (Instruction &I : *BB) {
1785  // If this is a load, save it. If this instruction can read from memory
1786  // but is not a load, then we quit. Notice that we don't handle function
1787  // calls that read or write.
1788  if (I.mayReadFromMemory()) {
1789  // Many math library functions read the rounding mode. We will only
1790  // vectorize a loop if it contains known function calls that don't set
1791  // the flag. Therefore, it is safe to ignore this read from memory.
1792  auto *Call = dyn_cast<CallInst>(&I);
1793  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1794  continue;
1795 
1796  // If the function has an explicit vectorized counterpart, we can safely
1797  // assume that it can be vectorized.
1798  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1799  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1800  continue;
1801 
1802  auto *Ld = dyn_cast<LoadInst>(&I);
1803  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1804  recordAnalysis("NonSimpleLoad", Ld)
1805  << "read with atomic ordering or volatile read";
1806  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1807  CanVecMem = false;
1808  return;
1809  }
1810  NumLoads++;
1811  Loads.push_back(Ld);
1812  DepChecker->addAccess(Ld);
1814  collectStridedAccess(Ld);
1815  continue;
1816  }
1817 
1818  // Save 'store' instructions. Abort if other instructions write to memory.
1819  if (I.mayWriteToMemory()) {
1820  auto *St = dyn_cast<StoreInst>(&I);
1821  if (!St) {
1822  recordAnalysis("CantVectorizeInstruction", St)
1823  << "instruction cannot be vectorized";
1824  CanVecMem = false;
1825  return;
1826  }
1827  if (!St->isSimple() && !IsAnnotatedParallel) {
1828  recordAnalysis("NonSimpleStore", St)
1829  << "write with atomic ordering or volatile write";
1830  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1831  CanVecMem = false;
1832  return;
1833  }
1834  NumStores++;
1835  Stores.push_back(St);
1836  DepChecker->addAccess(St);
1838  collectStridedAccess(St);
1839  }
1840  } // Next instr.
1841  } // Next block.
1842 
1843  // Now we have two lists that hold the loads and the stores.
1844  // Next, we find the pointers that they use.
1845 
1846  // Check if we see any stores. If there are no stores, then we don't
1847  // care if the pointers are *restrict*.
1848  if (!Stores.size()) {
1849  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1850  CanVecMem = true;
1851  return;
1852  }
1853 
1854  MemoryDepChecker::DepCandidates DependentAccesses;
1855  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1856  TheLoop, AA, LI, DependentAccesses, *PSE);
1857 
1858  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1859  // multiple times on the same object. If the ptr is accessed twice, once
1860  // for read and once for write, it will only appear once (on the write
1861  // list). This is okay, since we are going to check for conflicts between
1862  // writes and between reads and writes, but not between reads and reads.
1863  ValueSet Seen;
1864 
1865  // Record uniform store addresses to identify if we have multiple stores
1866  // to the same address.
1867  ValueSet UniformStores;
1868 
1869  for (StoreInst *ST : Stores) {
1870  Value *Ptr = ST->getPointerOperand();
1871 
1872  if (isUniform(Ptr))
1873  HasDependenceInvolvingLoopInvariantAddress |=
1874  !UniformStores.insert(Ptr).second;
1875 
1876  // If we did *not* see this pointer before, insert it to the read-write
1877  // list. At this phase it is only a 'write' list.
1878  if (Seen.insert(Ptr).second) {
1879  ++NumReadWrites;
1880 
1882  // The TBAA metadata could have a control dependency on the predication
1883  // condition, so we cannot rely on it when determining whether or not we
1884  // need runtime pointer checks.
1885  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1886  Loc.AATags.TBAA = nullptr;
1887 
1888  Accesses.addStore(Loc);
1889  }
1890  }
1891 
1892  if (IsAnnotatedParallel) {
1893  LLVM_DEBUG(
1894  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1895  << "checks.\n");
1896  CanVecMem = true;
1897  return;
1898  }
1899 
1900  for (LoadInst *LD : Loads) {
1901  Value *Ptr = LD->getPointerOperand();
1902  // If we did *not* see this pointer before, insert it to the
1903  // read list. If we *did* see it before, then it is already in
1904  // the read-write list. This allows us to vectorize expressions
1905  // such as A[i] += x; Because the address of A[i] is a read-write
1906  // pointer. This only works if the index of A[i] is consecutive.
1907  // If the address of i is unknown (for example A[B[i]]) then we may
1908  // read a few words, modify, and write a few words, and some of the
1909  // words may be written to the same address.
1910  bool IsReadOnlyPtr = false;
1911  if (Seen.insert(Ptr).second ||
1912  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1913  ++NumReads;
1914  IsReadOnlyPtr = true;
1915  }
1916 
1917  // See if there is an unsafe dependency between a load to a uniform address and
1918  // store to the same uniform address.
1919  if (UniformStores.count(Ptr)) {
1920  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
1921  "load and uniform store to the same address!\n");
1922  HasDependenceInvolvingLoopInvariantAddress = true;
1923  }
1924 
1926  // The TBAA metadata could have a control dependency on the predication
1927  // condition, so we cannot rely on it when determining whether or not we
1928  // need runtime pointer checks.
1929  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1930  Loc.AATags.TBAA = nullptr;
1931 
1932  Accesses.addLoad(Loc, IsReadOnlyPtr);
1933  }
1934 
1935  // If we write (or read-write) to a single destination and there are no
1936  // other reads in this loop then is it safe to vectorize.
1937  if (NumReadWrites == 1 && NumReads == 0) {
1938  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1939  CanVecMem = true;
1940  return;
1941  }
1942 
1943  // Build dependence sets and check whether we need a runtime pointer bounds
1944  // check.
1945  Accesses.buildDependenceSets();
1946 
1947  // Find pointers with computable bounds. We are going to use this information
1948  // to place a runtime bound check.
1949  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1950  TheLoop, SymbolicStrides);
1951  if (!CanDoRTIfNeeded) {
1952  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1953  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1954  << "the array bounds.\n");
1955  CanVecMem = false;
1956  return;
1957  }
1958 
1959  LLVM_DEBUG(
1960  dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1961 
1962  CanVecMem = true;
1963  if (Accesses.isDependencyCheckNeeded()) {
1964  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1965  CanVecMem = DepChecker->areDepsSafe(
1966  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1967  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1968 
1969  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1970  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1971 
1972  // Clear the dependency checks. We assume they are not needed.
1973  Accesses.resetDepChecks(*DepChecker);
1974 
1975  PtrRtChecking->reset();
1976  PtrRtChecking->Need = true;
1977 
1978  auto *SE = PSE->getSE();
1979  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1980  SymbolicStrides, true);
1981 
1982  // Check that we found the bounds for the pointer.
1983  if (!CanDoRTIfNeeded) {
1984  recordAnalysis("CantCheckMemDepsAtRunTime")
1985  << "cannot check memory dependencies at runtime";
1986  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1987  CanVecMem = false;
1988  return;
1989  }
1990 
1991  CanVecMem = true;
1992  }
1993  }
1994 
1995  if (CanVecMem)
1996  LLVM_DEBUG(
1997  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1998  << (PtrRtChecking->Need ? "" : " don't")
1999  << " need runtime memory checks.\n");
2000  else {
2001  recordAnalysis("UnsafeMemDep")
2002  << "unsafe dependent memory operations in loop. Use "
2003  "#pragma loop distribute(enable) to allow loop distribution "
2004  "to attempt to isolate the offending operations into a separate "
2005  "loop";
2006  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2007  }
2008 }
2009 
2011  DominatorTree *DT) {
2012  assert(TheLoop->contains(BB) && "Unknown block used");
2013 
2014  // Blocks that do not dominate the latch need predication.
2015  BasicBlock* Latch = TheLoop->getLoopLatch();
2016  return !DT->dominates(BB, Latch);
2017 }
2018 
2019 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2020  Instruction *I) {
2021  assert(!Report && "Multiple reports generated");
2022 
2023  Value *CodeRegion = TheLoop->getHeader();
2024  DebugLoc DL = TheLoop->getStartLoc();
2025 
2026  if (I) {
2027  CodeRegion = I->getParent();
2028  // If there is no debug location attached to the instruction, revert back to
2029  // using the loop's.
2030  if (I->getDebugLoc())
2031  DL = I->getDebugLoc();
2032  }
2033 
2034  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2035  CodeRegion);
2036  return *Report;
2037 }
2038 
2040  auto *SE = PSE->getSE();
2041  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2042  // never considered uniform.
2043  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2044  // trivially loop-invariant FP values to be considered uniform.
2045  if (!SE->isSCEVable(V->getType()))
2046  return false;
2047  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2048 }
2049 
2050 // FIXME: this function is currently a duplicate of the one in
2051 // LoopVectorize.cpp.
2053  Instruction *Loc) {
2054  if (FirstInst)
2055  return FirstInst;
2056  if (Instruction *I = dyn_cast<Instruction>(V))
2057  return I->getParent() == Loc->getParent() ? I : nullptr;
2058  return nullptr;
2059 }
2060 
2061 namespace {
2062 
2063 /// IR Values for the lower and upper bounds of a pointer evolution. We
2064 /// need to use value-handles because SCEV expansion can invalidate previously
2065 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2066 /// a previous one.
2067 struct PointerBounds {
2068  TrackingVH<Value> Start;
2069  TrackingVH<Value> End;
2070 };
2071 
2072 } // end anonymous namespace
2073 
2074 /// Expand code for the lower and upper bound of the pointer group \p CG
2075 /// in \p TheLoop. \return the values for the bounds.
2076 static PointerBounds
2078  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2079  const RuntimePointerChecking &PtrRtChecking) {
2080  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2081  const SCEV *Sc = SE->getSCEV(Ptr);
2082 
2083  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2084  LLVMContext &Ctx = Loc->getContext();
2085 
2086  // Use this type for pointer arithmetic.
2087  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2088 
2089  if (SE->isLoopInvariant(Sc, TheLoop)) {
2090  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2091  << *Ptr << "\n");
2092  // Ptr could be in the loop body. If so, expand a new one at the correct
2093  // location.
2094  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2095  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2096  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2097  : Ptr;
2098  // We must return a half-open range, which means incrementing Sc.
2099  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2100  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2101  return {NewPtr, NewPtrPlusOne};
2102  } else {
2103  Value *Start = nullptr, *End = nullptr;
2104  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2105  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2106  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2107  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2108  << "\n");
2109  return {Start, End};
2110  }
2111 }
2112 
2113 /// Turns a collection of checks into a collection of expanded upper and
2114 /// lower bounds for both pointers in the check.
2117  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2118  const RuntimePointerChecking &PtrRtChecking) {
2120 
2121  // Here we're relying on the SCEV Expander's cache to only emit code for the
2122  // same bounds once.
2123  transform(
2124  PointerChecks, std::back_inserter(ChecksWithBounds),
2126  PointerBounds
2127  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2128  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2129  return std::make_pair(First, Second);
2130  });
2131 
2132  return ChecksWithBounds;
2133 }
2134 
2135 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2136  Instruction *Loc,
2138  const {
2139  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2140  auto *SE = PSE->getSE();
2141  SCEVExpander Exp(*SE, DL, "induction");
2142  auto ExpandedChecks =
2143  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2144 
2145  LLVMContext &Ctx = Loc->getContext();
2146  Instruction *FirstInst = nullptr;
2147  IRBuilder<> ChkBuilder(Loc);
2148  // Our instructions might fold to a constant.
2149  Value *MemoryRuntimeCheck = nullptr;
2150 
2151  for (const auto &Check : ExpandedChecks) {
2152  const PointerBounds &A = Check.first, &B = Check.second;
2153  // Check if two pointers (A and B) conflict where conflict is computed as:
2154  // start(A) <= end(B) && start(B) <= end(A)
2155  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2156  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2157 
2158  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2159  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2160  "Trying to bounds check pointers with different address spaces");
2161 
2162  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2163  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2164 
2165  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2166  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2167  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2168  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2169 
2170  // [A|B].Start points to the first accessed byte under base [A|B].
2171  // [A|B].End points to the last accessed byte, plus one.
2172  // There is no conflict when the intervals are disjoint:
2173  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2174  //
2175  // bound0 = (B.Start < A.End)
2176  // bound1 = (A.Start < B.End)
2177  // IsConflict = bound0 & bound1
2178  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2179  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2180  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2181  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2182  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2183  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2184  if (MemoryRuntimeCheck) {
2185  IsConflict =
2186  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2187  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2188  }
2189  MemoryRuntimeCheck = IsConflict;
2190  }
2191 
2192  if (!MemoryRuntimeCheck)
2193  return std::make_pair(nullptr, nullptr);
2194 
2195  // We have to do this trickery because the IRBuilder might fold the check to a
2196  // constant expression in which case there is no Instruction anchored in a
2197  // the block.
2198  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2199  ConstantInt::getTrue(Ctx));
2200  ChkBuilder.Insert(Check, "memcheck.conflict");
2201  FirstInst = getFirstInst(FirstInst, Check, Loc);
2202  return std::make_pair(FirstInst, Check);
2203 }
2204 
2205 std::pair<Instruction *, Instruction *>
2207  if (!PtrRtChecking->Need)
2208  return std::make_pair(nullptr, nullptr);
2209 
2210  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2211 }
2212 
2213 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2214  Value *Ptr = nullptr;
2215  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2216  Ptr = LI->getPointerOperand();
2217  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2218  Ptr = SI->getPointerOperand();
2219  else
2220  return;
2221 
2222  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2223  if (!Stride)
2224  return;
2225 
2226  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2227  "versioning:");
2228  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2229 
2230  // Avoid adding the "Stride == 1" predicate when we know that
2231  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2232  // or zero iteration loop, as Trip-Count <= Stride == 1.
2233  //
2234  // TODO: We are currently not making a very informed decision on when it is
2235  // beneficial to apply stride versioning. It might make more sense that the
2236  // users of this analysis (such as the vectorizer) will trigger it, based on
2237  // their specific cost considerations; For example, in cases where stride
2238  // versioning does not help resolving memory accesses/dependences, the
2239  // vectorizer should evaluate the cost of the runtime test, and the benefit
2240  // of various possible stride specializations, considering the alternatives
2241  // of using gather/scatters (if available).
2242 
2243  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2244  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2245 
2246  // Match the types so we can compare the stride and the BETakenCount.
2247  // The Stride can be positive/negative, so we sign extend Stride;
2248  // The backdgeTakenCount is non-negative, so we zero extend BETakenCount.
2249  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2250  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2251  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2252  const SCEV *CastedStride = StrideExpr;
2253  const SCEV *CastedBECount = BETakenCount;
2254  ScalarEvolution *SE = PSE->getSE();
2255  if (BETypeSize >= StrideTypeSize)
2256  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2257  else
2258  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2259  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2260  // Since TripCount == BackEdgeTakenCount + 1, checking:
2261  // "Stride >= TripCount" is equivalent to checking:
2262  // Stride - BETakenCount > 0
2263  if (SE->isKnownPositive(StrideMinusBETaken)) {
2264  LLVM_DEBUG(
2265  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2266  "Stride==1 predicate will imply that the loop executes "
2267  "at most once.\n");
2268  return;
2269  }
2270  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2271 
2272  SymbolicStrides[Ptr] = Stride;
2273  StrideSet.insert(Stride);
2274 }
2275 
2277  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2278  DominatorTree *DT, LoopInfo *LI)
2279  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2280  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2281  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2282  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2283  HasDependenceInvolvingLoopInvariantAddress(false) {
2284  if (canAnalyzeLoop())
2285  analyzeLoop(AA, LI, TLI, DT);
2286 }
2287 
2288 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2289  if (CanVecMem) {
2290  OS.indent(Depth) << "Memory dependences are safe";
2291  if (MaxSafeDepDistBytes != -1ULL)
2292  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2293  << " bytes";
2294  if (PtrRtChecking->Need)
2295  OS << " with run-time checks";
2296  OS << "\n";
2297  }
2298 
2299  if (Report)
2300  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2301 
2302  if (auto *Dependences = DepChecker->getDependences()) {
2303  OS.indent(Depth) << "Dependences:\n";
2304  for (auto &Dep : *Dependences) {
2305  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2306  OS << "\n";
2307  }
2308  } else
2309  OS.indent(Depth) << "Too many dependences, not recorded\n";
2310 
2311  // List the pair of accesses need run-time checks to prove independence.
2312  PtrRtChecking->print(OS, Depth);
2313  OS << "\n";
2314 
2315  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2316  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2317  << "found in loop.\n";
2318 
2319  OS.indent(Depth) << "SCEV assumptions:\n";
2320  PSE->getUnionPredicate().print(OS, Depth);
2321 
2322  OS << "\n";
2323 
2324  OS.indent(Depth) << "Expressions re-written:\n";
2325  PSE->print(OS, Depth);
2326 }
2327 
2329  auto &LAI = LoopAccessInfoMap[L];
2330 
2331  if (!LAI)
2332  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2333 
2334  return *LAI.get();
2335 }
2336 
2338  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2339 
2340  for (Loop *TopLevelLoop : *LI)
2341  for (Loop *L : depth_first(TopLevelLoop)) {
2342  OS.indent(2) << L->getHeader()->getName() << ":\n";
2343  auto &LAI = LAA.getInfo(L);
2344  LAI.print(OS, 4);
2345  }
2346 }
2347 
2349  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2350  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2351  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2352  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2353  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2354  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2355 
2356  return false;
2357 }
2358 
2364 
2365  AU.setPreservesAll();
2366 }
2367 
2369 static const char laa_name[] = "Loop Access Analysis";
2370 #define LAA_NAME "loop-accesses"
2371 
2378 
2380 
2383  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2384 }
2385 
2386 namespace llvm {
2387 
2389  return new LoopAccessLegacyAnalysis();
2390  }
2391 
2392 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1800
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:259
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:250
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
This class represents lattice values for constants.
Definition: AllocatorList.h:24
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:658
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1764
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static constexpr LocationSize unknown()
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
void push_back(const T &Elt)
Definition: SmallVector.h:218
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1023
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1180
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:168
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&... args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:1343
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:376
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:134
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:227
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:365
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:100
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1641
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:310
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1142
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:843
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:419
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:391
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:337
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:129
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
unsigned getAddressSpace() const
Definition: Globals.cpp:111
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:181
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:331
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:53
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:188
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:249
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:48
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:578
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:548
Class for arbitrary precision integers.
Definition: APInt.h:70
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:311
Dependece between memory access instructions.
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:652
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:215
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
iterator end()
Definition: DenseMap.h:109
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1124
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1262
bool empty() const
Definition: LoopInfo.h:146
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:794
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
iterator_range< df_iterator< T > > depth_first(const T &G)
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:290
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
A vector that has set insertion semantics.
Definition: SetVector.h:41
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:46
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:123
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1232
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:435
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:274
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:165
void resize(size_type N)
Definition: SmallVector.h:351