LLVM  10.0.0svn
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/Analysis/LoopInfo.h"
38 #include "llvm/IR/BasicBlock.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DataLayout.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/DerivedTypes.h"
43 #include "llvm/IR/DiagnosticInfo.h"
44 #include "llvm/IR/Dominators.h"
45 #include "llvm/IR/Function.h"
46 #include "llvm/IR/IRBuilder.h"
47 #include "llvm/IR/InstrTypes.h"
48 #include "llvm/IR/Instruction.h"
49 #include "llvm/IR/Instructions.h"
50 #include "llvm/IR/Operator.h"
51 #include "llvm/IR/PassManager.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/Pass.h"
56 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/Debug.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <cstdlib>
65 #include <iterator>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 
71 #define DEBUG_TYPE "loop-accesses"
72 
74 VectorizationFactor("force-vector-width", cl::Hidden,
75  cl::desc("Sets the SIMD width. Zero is autoselect."),
78 
80 VectorizationInterleave("force-vector-interleave", cl::Hidden,
81  cl::desc("Sets the vectorization interleave count. "
82  "Zero is autoselect."),
86 
88  "runtime-memory-check-threshold", cl::Hidden,
89  cl::desc("When performing memory disambiguation checks at runtime do not "
90  "generate more than this number of comparisons (default = 8)."),
93 
94 /// The maximum iterations used to merge memory checks
96  "memory-check-merge-threshold", cl::Hidden,
97  cl::desc("Maximum number of comparisons done when trying to merge "
98  "runtime memory checks. (default = 100)"),
99  cl::init(100));
100 
101 /// Maximum SIMD width.
102 const unsigned VectorizerParams::MaxVectorWidth = 64;
103 
104 /// We collect dependences up to this threshold.
105 static cl::opt<unsigned>
106  MaxDependences("max-dependences", cl::Hidden,
107  cl::desc("Maximum number of dependences collected by "
108  "loop-access analysis (default = 100)"),
109  cl::init(100));
110 
111 /// This enables versioning on the strides of symbolically striding memory
112 /// accesses in code like the following.
113 /// for (i = 0; i < N; ++i)
114 /// A[i * Stride1] += B[i * Stride2] ...
115 ///
116 /// Will be roughly translated to
117 /// if (Stride1 == 1 && Stride2 == 1) {
118 /// for (i = 0; i < N; i+=4)
119 /// A[i:i+3] += ...
120 /// } else
121 /// ...
123  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
124  cl::desc("Enable symbolic stride memory access versioning"));
125 
126 /// Enable store-to-load forwarding conflict detection. This option can
127 /// be disabled for correctness testing.
129  "store-to-load-forwarding-conflict-detection", cl::Hidden,
130  cl::desc("Enable conflict detection in loop-access analysis"),
131  cl::init(true));
132 
134  return ::VectorizationInterleave.getNumOccurrences() > 0;
135 }
136 
138  if (auto *CI = dyn_cast<CastInst>(V))
139  if (CI->getOperand(0)->getType()->isIntegerTy())
140  return CI->getOperand(0);
141  return V;
142 }
143 
145  const ValueToValueMap &PtrToStride,
146  Value *Ptr, Value *OrigPtr) {
147  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
148 
149  // If there is an entry in the map return the SCEV of the pointer with the
150  // symbolic stride replaced by one.
152  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
153  if (SI != PtrToStride.end()) {
154  Value *StrideVal = SI->second;
155 
156  // Strip casts.
157  StrideVal = stripIntegerCast(StrideVal);
158 
159  ScalarEvolution *SE = PSE.getSE();
160  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
161  const auto *CT =
162  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
163 
164  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
165  auto *Expr = PSE.getSCEV(Ptr);
166 
167  LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
168  << " by: " << *Expr << "\n");
169  return Expr;
170  }
171 
172  // Otherwise, just return the SCEV of the original pointer.
173  return OrigSCEV;
174 }
175 
176 /// Calculate Start and End points of memory access.
177 /// Let's assume A is the first access and B is a memory access on N-th loop
178 /// iteration. Then B is calculated as:
179 /// B = A + Step*N .
180 /// Step value may be positive or negative.
181 /// N is a calculated back-edge taken count:
182 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
183 /// Start and End points are calculated in the following way:
184 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
185 /// where SizeOfElt is the size of single memory access in bytes.
186 ///
187 /// There is no conflict when the intervals are disjoint:
188 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
189 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
190  unsigned DepSetId, unsigned ASId,
191  const ValueToValueMap &Strides,
193  // Get the stride replaced scev.
194  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
195  ScalarEvolution *SE = PSE.getSE();
196 
197  const SCEV *ScStart;
198  const SCEV *ScEnd;
199 
200  if (SE->isLoopInvariant(Sc, Lp))
201  ScStart = ScEnd = Sc;
202  else {
203  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
204  assert(AR && "Invalid addrec expression");
205  const SCEV *Ex = PSE.getBackedgeTakenCount();
206 
207  ScStart = AR->getStart();
208  ScEnd = AR->evaluateAtIteration(Ex, *SE);
209  const SCEV *Step = AR->getStepRecurrence(*SE);
210 
211  // For expressions with negative step, the upper bound is ScStart and the
212  // lower bound is ScEnd.
213  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
214  if (CStep->getValue()->isNegative())
215  std::swap(ScStart, ScEnd);
216  } else {
217  // Fallback case: the step is not constant, but we can still
218  // get the upper and lower bounds of the interval by using min/max
219  // expressions.
220  ScStart = SE->getUMinExpr(ScStart, ScEnd);
221  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
222  }
223  // Add the size of the pointed element to ScEnd.
224  unsigned EltSize =
226  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
227  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
228  }
229 
230  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
231 }
232 
234 RuntimePointerChecking::generateChecks() const {
236 
237  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
238  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
239  const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I];
240  const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J];
241 
242  if (needsChecking(CGI, CGJ))
243  Checks.push_back(std::make_pair(&CGI, &CGJ));
244  }
245  }
246  return Checks;
247 }
248 
249 void RuntimePointerChecking::generateChecks(
250  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
251  assert(Checks.empty() && "Checks is not empty");
252  groupChecks(DepCands, UseDependencies);
253  Checks = generateChecks();
254 }
255 
257  const CheckingPtrGroup &N) const {
258  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
259  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
260  if (needsChecking(M.Members[I], N.Members[J]))
261  return true;
262  return false;
263 }
264 
265 /// Compare \p I and \p J and return the minimum.
266 /// Return nullptr in case we couldn't find an answer.
267 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
268  ScalarEvolution *SE) {
269  const SCEV *Diff = SE->getMinusSCEV(J, I);
270  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
271 
272  if (!C)
273  return nullptr;
274  if (C->getValue()->isNegative())
275  return J;
276  return I;
277 }
278 
280  const SCEV *Start = RtCheck.Pointers[Index].Start;
281  const SCEV *End = RtCheck.Pointers[Index].End;
282 
283  // Compare the starts and ends with the known minimum and maximum
284  // of this set. We need to know how we compare against the min/max
285  // of the set in order to be able to emit memchecks.
286  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
287  if (!Min0)
288  return false;
289 
290  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
291  if (!Min1)
292  return false;
293 
294  // Update the low bound expression if we've found a new min value.
295  if (Min0 == Start)
296  Low = Start;
297 
298  // Update the high bound expression if we've found a new max value.
299  if (Min1 != End)
300  High = End;
301 
302  Members.push_back(Index);
303  return true;
304 }
305 
306 void RuntimePointerChecking::groupChecks(
307  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
308  // We build the groups from dependency candidates equivalence classes
309  // because:
310  // - We know that pointers in the same equivalence class share
311  // the same underlying object and therefore there is a chance
312  // that we can compare pointers
313  // - We wouldn't be able to merge two pointers for which we need
314  // to emit a memcheck. The classes in DepCands are already
315  // conveniently built such that no two pointers in the same
316  // class need checking against each other.
317 
318  // We use the following (greedy) algorithm to construct the groups
319  // For every pointer in the equivalence class:
320  // For each existing group:
321  // - if the difference between this pointer and the min/max bounds
322  // of the group is a constant, then make the pointer part of the
323  // group and update the min/max bounds of that group as required.
324 
325  CheckingGroups.clear();
326 
327  // If we need to check two pointers to the same underlying object
328  // with a non-constant difference, we shouldn't perform any pointer
329  // grouping with those pointers. This is because we can easily get
330  // into cases where the resulting check would return false, even when
331  // the accesses are safe.
332  //
333  // The following example shows this:
334  // for (i = 0; i < 1000; ++i)
335  // a[5000 + i * m] = a[i] + a[i + 9000]
336  //
337  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
338  // (0, 10000) which is always false. However, if m is 1, there is no
339  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
340  // us to perform an accurate check in this case.
341  //
342  // The above case requires that we have an UnknownDependence between
343  // accesses to the same underlying object. This cannot happen unless
344  // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
345  // is also false. In this case we will use the fallback path and create
346  // separate checking groups for all pointers.
347 
348  // If we don't have the dependency partitions, construct a new
349  // checking pointer group for each pointer. This is also required
350  // for correctness, because in this case we can have checking between
351  // pointers to the same underlying object.
352  if (!UseDependencies) {
353  for (unsigned I = 0; I < Pointers.size(); ++I)
354  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
355  return;
356  }
357 
358  unsigned TotalComparisons = 0;
359 
360  DenseMap<Value *, unsigned> PositionMap;
361  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
362  PositionMap[Pointers[Index].PointerValue] = Index;
363 
364  // We need to keep track of what pointers we've already seen so we
365  // don't process them twice.
367 
368  // Go through all equivalence classes, get the "pointer check groups"
369  // and add them to the overall solution. We use the order in which accesses
370  // appear in 'Pointers' to enforce determinism.
371  for (unsigned I = 0; I < Pointers.size(); ++I) {
372  // We've seen this pointer before, and therefore already processed
373  // its equivalence class.
374  if (Seen.count(I))
375  continue;
376 
377  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
378  Pointers[I].IsWritePtr);
379 
381  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
382 
383  // Because DepCands is constructed by visiting accesses in the order in
384  // which they appear in alias sets (which is deterministic) and the
385  // iteration order within an equivalence class member is only dependent on
386  // the order in which unions and insertions are performed on the
387  // equivalence class, the iteration order is deterministic.
388  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
389  MI != ME; ++MI) {
390  unsigned Pointer = PositionMap[MI->getPointer()];
391  bool Merged = false;
392  // Mark this pointer as seen.
393  Seen.insert(Pointer);
394 
395  // Go through all the existing sets and see if we can find one
396  // which can include this pointer.
397  for (CheckingPtrGroup &Group : Groups) {
398  // Don't perform more than a certain amount of comparisons.
399  // This should limit the cost of grouping the pointers to something
400  // reasonable. If we do end up hitting this threshold, the algorithm
401  // will create separate groups for all remaining pointers.
402  if (TotalComparisons > MemoryCheckMergeThreshold)
403  break;
404 
405  TotalComparisons++;
406 
407  if (Group.addPointer(Pointer)) {
408  Merged = true;
409  break;
410  }
411  }
412 
413  if (!Merged)
414  // We couldn't add this pointer to any existing set or the threshold
415  // for the number of comparisons has been reached. Create a new group
416  // to hold the current pointer.
417  Groups.push_back(CheckingPtrGroup(Pointer, *this));
418  }
419 
420  // We've computed the grouped checks for this partition.
421  // Save the results and continue with the next one.
422  llvm::copy(Groups, std::back_inserter(CheckingGroups));
423  }
424 }
425 
427  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
428  unsigned PtrIdx2) {
429  return (PtrToPartition[PtrIdx1] != -1 &&
430  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
431 }
432 
433 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
434  const PointerInfo &PointerI = Pointers[I];
435  const PointerInfo &PointerJ = Pointers[J];
436 
437  // No need to check if two readonly pointers intersect.
438  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
439  return false;
440 
441  // Only need to check pointers between two different dependency sets.
442  if (PointerI.DependencySetId == PointerJ.DependencySetId)
443  return false;
444 
445  // Only need to check pointers in the same alias set.
446  if (PointerI.AliasSetId != PointerJ.AliasSetId)
447  return false;
448 
449  return true;
450 }
451 
453  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
454  unsigned Depth) const {
455  unsigned N = 0;
456  for (const auto &Check : Checks) {
457  const auto &First = Check.first->Members, &Second = Check.second->Members;
458 
459  OS.indent(Depth) << "Check " << N++ << ":\n";
460 
461  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
462  for (unsigned K = 0; K < First.size(); ++K)
463  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
464 
465  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
466  for (unsigned K = 0; K < Second.size(); ++K)
467  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
468  }
469 }
470 
472 
473  OS.indent(Depth) << "Run-time memory checks:\n";
474  printChecks(OS, Checks, Depth);
475 
476  OS.indent(Depth) << "Grouped accesses:\n";
477  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
478  const auto &CG = CheckingGroups[I];
479 
480  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
481  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
482  << ")\n";
483  for (unsigned J = 0; J < CG.Members.size(); ++J) {
484  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
485  << "\n";
486  }
487  }
488 }
489 
490 namespace {
491 
492 /// Analyses memory accesses in a loop.
493 ///
494 /// Checks whether run time pointer checks are needed and builds sets for data
495 /// dependence checking.
496 class AccessAnalysis {
497 public:
498  /// Read or write access location.
499  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
500  typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
501 
502  AccessAnalysis(const DataLayout &Dl, Loop *TheLoop, AliasAnalysis *AA,
505  : DL(Dl), TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
506  IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
507 
508  /// Register a load and whether it is only read from.
509  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
510  Value *Ptr = const_cast<Value*>(Loc.Ptr);
511  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
512  Accesses.insert(MemAccessInfo(Ptr, false));
513  if (IsReadOnly)
514  ReadOnlyPtr.insert(Ptr);
515  }
516 
517  /// Register a store.
518  void addStore(MemoryLocation &Loc) {
519  Value *Ptr = const_cast<Value*>(Loc.Ptr);
520  AST.add(Ptr, LocationSize::unknown(), Loc.AATags);
521  Accesses.insert(MemAccessInfo(Ptr, true));
522  }
523 
524  /// Check if we can emit a run-time no-alias check for \p Access.
525  ///
526  /// Returns true if we can emit a run-time no alias check for \p Access.
527  /// If we can check this access, this also adds it to a dependence set and
528  /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
529  /// we will attempt to use additional run-time checks in order to get
530  /// the bounds of the pointer.
531  bool createCheckForAccess(RuntimePointerChecking &RtCheck,
532  MemAccessInfo Access,
533  const ValueToValueMap &Strides,
534  DenseMap<Value *, unsigned> &DepSetId,
535  Loop *TheLoop, unsigned &RunningDepId,
536  unsigned ASId, bool ShouldCheckStride,
537  bool Assume);
538 
539  /// Check whether we can check the pointers at runtime for
540  /// non-intersection.
541  ///
542  /// Returns true if we need no check or if we do and we can generate them
543  /// (i.e. the pointers have computable bounds).
544  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
545  Loop *TheLoop, const ValueToValueMap &Strides,
546  bool ShouldCheckWrap = false);
547 
548  /// Goes over all memory accesses, checks whether a RT check is needed
549  /// and builds sets of dependent accesses.
550  void buildDependenceSets() {
551  processMemAccesses();
552  }
553 
554  /// Initial processing of memory accesses determined that we need to
555  /// perform dependency checking.
556  ///
557  /// Note that this can later be cleared if we retry memcheck analysis without
558  /// dependency checking (i.e. FoundNonConstantDistanceDependence).
559  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
560 
561  /// We decided that no dependence analysis would be used. Reset the state.
562  void resetDepChecks(MemoryDepChecker &DepChecker) {
563  CheckDeps.clear();
564  DepChecker.clearDependences();
565  }
566 
567  MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
568 
569 private:
570  typedef SetVector<MemAccessInfo> PtrAccessSet;
571 
572  /// Go over all memory access and check whether runtime pointer checks
573  /// are needed and build sets of dependency check candidates.
574  void processMemAccesses();
575 
576  /// Set of all accesses.
577  PtrAccessSet Accesses;
578 
579  const DataLayout &DL;
580 
581  /// The loop being checked.
582  const Loop *TheLoop;
583 
584  /// List of accesses that need a further dependence check.
585  MemAccessInfoList CheckDeps;
586 
587  /// Set of pointers that are read only.
588  SmallPtrSet<Value*, 16> ReadOnlyPtr;
589 
590  /// An alias set tracker to partition the access set by underlying object and
591  //intrinsic property (such as TBAA metadata).
592  AliasSetTracker AST;
593 
594  LoopInfo *LI;
595 
596  /// Sets of potentially dependent accesses - members of one set share an
597  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
598  /// dependence check.
600 
601  /// Initial processing of memory accesses determined that we may need
602  /// to add memchecks. Perform the analysis to determine the necessary checks.
603  ///
604  /// Note that, this is different from isDependencyCheckNeeded. When we retry
605  /// memcheck analysis without dependency checking
606  /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
607  /// cleared while this remains set if we have potentially dependent accesses.
608  bool IsRTCheckAnalysisNeeded;
609 
610  /// The SCEV predicate containing all the SCEV-related assumptions.
612 };
613 
614 } // end anonymous namespace
615 
616 /// Check whether a pointer can participate in a runtime bounds check.
617 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
618 /// by adding run-time checks (overflow checks) if necessary.
620  const ValueToValueMap &Strides, Value *Ptr,
621  Loop *L, bool Assume) {
622  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
623 
624  // The bounds for loop-invariant pointer is trivial.
625  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
626  return true;
627 
628  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
629 
630  if (!AR && Assume)
631  AR = PSE.getAsAddRec(Ptr);
632 
633  if (!AR)
634  return false;
635 
636  return AR->isAffine();
637 }
638 
639 /// Check whether a pointer address cannot wrap.
641  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
642  const SCEV *PtrScev = PSE.getSCEV(Ptr);
643  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
644  return true;
645 
646  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
647  if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
648  return true;
649 
650  return false;
651 }
652 
653 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
654  MemAccessInfo Access,
655  const ValueToValueMap &StridesMap,
656  DenseMap<Value *, unsigned> &DepSetId,
657  Loop *TheLoop, unsigned &RunningDepId,
658  unsigned ASId, bool ShouldCheckWrap,
659  bool Assume) {
660  Value *Ptr = Access.getPointer();
661 
662  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
663  return false;
664 
665  // When we run after a failing dependency check we have to make sure
666  // we don't have wrapping pointers.
667  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
668  auto *Expr = PSE.getSCEV(Ptr);
669  if (!Assume || !isa<SCEVAddRecExpr>(Expr))
670  return false;
671  PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
672  }
673 
674  // The id of the dependence set.
675  unsigned DepId;
676 
677  if (isDependencyCheckNeeded()) {
678  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
679  unsigned &LeaderId = DepSetId[Leader];
680  if (!LeaderId)
681  LeaderId = RunningDepId++;
682  DepId = LeaderId;
683  } else
684  // Each access has its own dependence set.
685  DepId = RunningDepId++;
686 
687  bool IsWrite = Access.getInt();
688  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
689  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
690 
691  return true;
692  }
693 
694 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
695  ScalarEvolution *SE, Loop *TheLoop,
696  const ValueToValueMap &StridesMap,
697  bool ShouldCheckWrap) {
698  // Find pointers with computable bounds. We are going to use this information
699  // to place a runtime bound check.
700  bool CanDoRT = true;
701 
702  bool NeedRTCheck = false;
703  if (!IsRTCheckAnalysisNeeded) return true;
704 
705  bool IsDepCheckNeeded = isDependencyCheckNeeded();
706 
707  // We assign a consecutive id to access from different alias sets.
708  // Accesses between different groups doesn't need to be checked.
709  unsigned ASId = 1;
710  for (auto &AS : AST) {
711  int NumReadPtrChecks = 0;
712  int NumWritePtrChecks = 0;
713  bool CanDoAliasSetRT = true;
714 
715  // We assign consecutive id to access from different dependence sets.
716  // Accesses within the same set don't need a runtime check.
717  unsigned RunningDepId = 1;
719 
721 
722  for (auto A : AS) {
723  Value *Ptr = A.getValue();
724  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
725  MemAccessInfo Access(Ptr, IsWrite);
726 
727  if (IsWrite)
728  ++NumWritePtrChecks;
729  else
730  ++NumReadPtrChecks;
731 
732  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
733  RunningDepId, ASId, ShouldCheckWrap, false)) {
734  LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
735  Retries.push_back(Access);
736  CanDoAliasSetRT = false;
737  }
738  }
739 
740  // If we have at least two writes or one write and a read then we need to
741  // check them. But there is no need to checks if there is only one
742  // dependence set for this alias set.
743  //
744  // Note that this function computes CanDoRT and NeedRTCheck independently.
745  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
746  // for which we couldn't find the bounds but we don't actually need to emit
747  // any checks so it does not matter.
748  bool NeedsAliasSetRTCheck = false;
749  if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2))
750  NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
751  (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
752 
753  // We need to perform run-time alias checks, but some pointers had bounds
754  // that couldn't be checked.
755  if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
756  // Reset the CanDoSetRt flag and retry all accesses that have failed.
757  // We know that we need these checks, so we can now be more aggressive
758  // and add further checks if required (overflow checks).
759  CanDoAliasSetRT = true;
760  for (auto Access : Retries)
761  if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
762  TheLoop, RunningDepId, ASId,
763  ShouldCheckWrap, /*Assume=*/true)) {
764  CanDoAliasSetRT = false;
765  break;
766  }
767  }
768 
769  CanDoRT &= CanDoAliasSetRT;
770  NeedRTCheck |= NeedsAliasSetRTCheck;
771  ++ASId;
772  }
773 
774  // If the pointers that we would use for the bounds comparison have different
775  // address spaces, assume the values aren't directly comparable, so we can't
776  // use them for the runtime check. We also have to assume they could
777  // overlap. In the future there should be metadata for whether address spaces
778  // are disjoint.
779  unsigned NumPointers = RtCheck.Pointers.size();
780  for (unsigned i = 0; i < NumPointers; ++i) {
781  for (unsigned j = i + 1; j < NumPointers; ++j) {
782  // Only need to check pointers between two different dependency sets.
783  if (RtCheck.Pointers[i].DependencySetId ==
784  RtCheck.Pointers[j].DependencySetId)
785  continue;
786  // Only need to check pointers in the same alias set.
787  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
788  continue;
789 
790  Value *PtrI = RtCheck.Pointers[i].PointerValue;
791  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
792 
793  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
794  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
795  if (ASi != ASj) {
796  LLVM_DEBUG(
797  dbgs() << "LAA: Runtime check would require comparison between"
798  " different address spaces\n");
799  return false;
800  }
801  }
802  }
803 
804  if (NeedRTCheck && CanDoRT)
805  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
806 
807  LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
808  << " pointer comparisons.\n");
809 
810  RtCheck.Need = NeedRTCheck;
811 
812  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
813  if (!CanDoRTIfNeeded)
814  RtCheck.reset();
815  return CanDoRTIfNeeded;
816 }
817 
818 void AccessAnalysis::processMemAccesses() {
819  // We process the set twice: first we process read-write pointers, last we
820  // process read-only pointers. This allows us to skip dependence tests for
821  // read-only pointers.
822 
823  LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
824  LLVM_DEBUG(dbgs() << " AST: "; AST.dump());
825  LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
826  LLVM_DEBUG({
827  for (auto A : Accesses)
828  dbgs() << "\t" << *A.getPointer() << " (" <<
829  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
830  "read-only" : "read")) << ")\n";
831  });
832 
833  // The AliasSetTracker has nicely partitioned our pointers by metadata
834  // compatibility and potential for underlying-object overlap. As a result, we
835  // only need to check for potential pointer dependencies within each alias
836  // set.
837  for (auto &AS : AST) {
838  // Note that both the alias-set tracker and the alias sets themselves used
839  // linked lists internally and so the iteration order here is deterministic
840  // (matching the original instruction order within each set).
841 
842  bool SetHasWrite = false;
843 
844  // Map of pointers to last access encountered.
845  typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
846  UnderlyingObjToAccessMap ObjToLastAccess;
847 
848  // Set of access to check after all writes have been processed.
849  PtrAccessSet DeferredAccesses;
850 
851  // Iterate over each alias set twice, once to process read/write pointers,
852  // and then to process read-only pointers.
853  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
854  bool UseDeferred = SetIteration > 0;
855  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
856 
857  for (auto AV : AS) {
858  Value *Ptr = AV.getValue();
859 
860  // For a single memory access in AliasSetTracker, Accesses may contain
861  // both read and write, and they both need to be handled for CheckDeps.
862  for (auto AC : S) {
863  if (AC.getPointer() != Ptr)
864  continue;
865 
866  bool IsWrite = AC.getInt();
867 
868  // If we're using the deferred access set, then it contains only
869  // reads.
870  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
871  if (UseDeferred && !IsReadOnlyPtr)
872  continue;
873  // Otherwise, the pointer must be in the PtrAccessSet, either as a
874  // read or a write.
875  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
876  S.count(MemAccessInfo(Ptr, false))) &&
877  "Alias-set pointer not in the access set?");
878 
879  MemAccessInfo Access(Ptr, IsWrite);
880  DepCands.insert(Access);
881 
882  // Memorize read-only pointers for later processing and skip them in
883  // the first round (they need to be checked after we have seen all
884  // write pointers). Note: we also mark pointer that are not
885  // consecutive as "read-only" pointers (so that we check
886  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
887  if (!UseDeferred && IsReadOnlyPtr) {
888  DeferredAccesses.insert(Access);
889  continue;
890  }
891 
892  // If this is a write - check other reads and writes for conflicts. If
893  // this is a read only check other writes for conflicts (but only if
894  // there is no other write to the ptr - this is an optimization to
895  // catch "a[i] = a[i] + " without having to do a dependence check).
896  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
897  CheckDeps.push_back(Access);
898  IsRTCheckAnalysisNeeded = true;
899  }
900 
901  if (IsWrite)
902  SetHasWrite = true;
903 
904  // Create sets of pointers connected by a shared alias set and
905  // underlying object.
906  typedef SmallVector<const Value *, 16> ValueVector;
907  ValueVector TempObjects;
908 
909  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
910  LLVM_DEBUG(dbgs()
911  << "Underlying objects for pointer " << *Ptr << "\n");
912  for (const Value *UnderlyingObj : TempObjects) {
913  // nullptr never alias, don't join sets for pointer that have "null"
914  // in their UnderlyingObjects list.
915  if (isa<ConstantPointerNull>(UnderlyingObj) &&
917  TheLoop->getHeader()->getParent(),
918  UnderlyingObj->getType()->getPointerAddressSpace()))
919  continue;
920 
921  UnderlyingObjToAccessMap::iterator Prev =
922  ObjToLastAccess.find(UnderlyingObj);
923  if (Prev != ObjToLastAccess.end())
924  DepCands.unionSets(Access, Prev->second);
925 
926  ObjToLastAccess[UnderlyingObj] = Access;
927  LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
928  }
929  }
930  }
931  }
932  }
933 }
934 
935 static bool isInBoundsGep(Value *Ptr) {
936  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
937  return GEP->isInBounds();
938  return false;
939 }
940 
941 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
942 /// i.e. monotonically increasing/decreasing.
943 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
944  PredicatedScalarEvolution &PSE, const Loop *L) {
945  // FIXME: This should probably only return true for NUW.
947  return true;
948 
949  // Scalar evolution does not propagate the non-wrapping flags to values that
950  // are derived from a non-wrapping induction variable because non-wrapping
951  // could be flow-sensitive.
952  //
953  // Look through the potentially overflowing instruction to try to prove
954  // non-wrapping for the *specific* value of Ptr.
955 
956  // The arithmetic implied by an inbounds GEP can't overflow.
957  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
958  if (!GEP || !GEP->isInBounds())
959  return false;
960 
961  // Make sure there is only one non-const index and analyze that.
962  Value *NonConstIndex = nullptr;
963  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
964  if (!isa<ConstantInt>(Index)) {
965  if (NonConstIndex)
966  return false;
967  NonConstIndex = Index;
968  }
969  if (!NonConstIndex)
970  // The recurrence is on the pointer, ignore for now.
971  return false;
972 
973  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
974  // AddRec using a NSW operation.
975  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
976  if (OBO->hasNoSignedWrap() &&
977  // Assume constant for other the operand so that the AddRec can be
978  // easily found.
979  isa<ConstantInt>(OBO->getOperand(1))) {
980  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
981 
982  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
983  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
984  }
985 
986  return false;
987 }
988 
989 /// Check whether the access through \p Ptr has a constant stride.
991  const Loop *Lp, const ValueToValueMap &StridesMap,
992  bool Assume, bool ShouldCheckWrap) {
993  Type *Ty = Ptr->getType();
994  assert(Ty->isPointerTy() && "Unexpected non-ptr");
995 
996  // Make sure that the pointer does not point to aggregate types.
997  auto *PtrTy = cast<PointerType>(Ty);
998  if (PtrTy->getElementType()->isAggregateType()) {
999  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
1000  << *Ptr << "\n");
1001  return 0;
1002  }
1003 
1004  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1005 
1006  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1007  if (Assume && !AR)
1008  AR = PSE.getAsAddRec(Ptr);
1009 
1010  if (!AR) {
1011  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1012  << " SCEV: " << *PtrScev << "\n");
1013  return 0;
1014  }
1015 
1016  // The access function must stride over the innermost loop.
1017  if (Lp != AR->getLoop()) {
1018  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1019  << *Ptr << " SCEV: " << *AR << "\n");
1020  return 0;
1021  }
1022 
1023  // The address calculation must not wrap. Otherwise, a dependence could be
1024  // inverted.
1025  // An inbounds getelementptr that is a AddRec with a unit stride
1026  // cannot wrap per definition. The unit stride requirement is checked later.
1027  // An getelementptr without an inbounds attribute and unit stride would have
1028  // to access the pointer value "0" which is undefined behavior in address
1029  // space 0, therefore we can also vectorize this case.
1030  bool IsInBoundsGEP = isInBoundsGep(Ptr);
1031  bool IsNoWrapAddRec = !ShouldCheckWrap ||
1033  isNoWrapAddRec(Ptr, AR, PSE, Lp);
1034  if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1036  PtrTy->getAddressSpace())) {
1037  if (Assume) {
1039  IsNoWrapAddRec = true;
1040  LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
1041  << "LAA: Pointer: " << *Ptr << "\n"
1042  << "LAA: SCEV: " << *AR << "\n"
1043  << "LAA: Added an overflow assumption\n");
1044  } else {
1045  LLVM_DEBUG(
1046  dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1047  << *Ptr << " SCEV: " << *AR << "\n");
1048  return 0;
1049  }
1050  }
1051 
1052  // Check the step is constant.
1053  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1054 
1055  // Calculate the pointer stride and check if it is constant.
1056  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1057  if (!C) {
1058  LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1059  << " SCEV: " << *AR << "\n");
1060  return 0;
1061  }
1062 
1063  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1064  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1065  const APInt &APStepVal = C->getAPInt();
1066 
1067  // Huge step value - give up.
1068  if (APStepVal.getBitWidth() > 64)
1069  return 0;
1070 
1071  int64_t StepVal = APStepVal.getSExtValue();
1072 
1073  // Strided access.
1074  int64_t Stride = StepVal / Size;
1075  int64_t Rem = StepVal % Size;
1076  if (Rem)
1077  return 0;
1078 
1079  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1080  // know we can't "wrap around the address space". In case of address space
1081  // zero we know that this won't happen without triggering undefined behavior.
1082  if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1083  (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
1084  PtrTy->getAddressSpace()))) {
1085  if (Assume) {
1086  // We can avoid this case by adding a run-time check.
1087  LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1088  << "inbounds or in address space 0 may wrap:\n"
1089  << "LAA: Pointer: " << *Ptr << "\n"
1090  << "LAA: SCEV: " << *AR << "\n"
1091  << "LAA: Added an overflow assumption\n");
1093  } else
1094  return 0;
1095  }
1096 
1097  return Stride;
1098 }
1099 
1101  ScalarEvolution &SE,
1102  SmallVectorImpl<unsigned> &SortedIndices) {
1104  VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1105  "Expected list of pointer operands.");
1107  OffValPairs.reserve(VL.size());
1108 
1109  // Walk over the pointers, and map each of them to an offset relative to
1110  // first pointer in the array.
1111  Value *Ptr0 = VL[0];
1112  const SCEV *Scev0 = SE.getSCEV(Ptr0);
1113  Value *Obj0 = GetUnderlyingObject(Ptr0, DL);
1114 
1116  for (auto *Ptr : VL) {
1117  // TODO: Outline this code as a special, more time consuming, version of
1118  // computeConstantDifference() function.
1119  if (Ptr->getType()->getPointerAddressSpace() !=
1120  Ptr0->getType()->getPointerAddressSpace())
1121  return false;
1122  // If a pointer refers to a different underlying object, bail - the
1123  // pointers are by definition incomparable.
1124  Value *CurrObj = GetUnderlyingObject(Ptr, DL);
1125  if (CurrObj != Obj0)
1126  return false;
1127 
1128  const SCEV *Scev = SE.getSCEV(Ptr);
1129  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Scev, Scev0));
1130  // The pointers may not have a constant offset from each other, or SCEV
1131  // may just not be smart enough to figure out they do. Regardless,
1132  // there's nothing we can do.
1133  if (!Diff)
1134  return false;
1135 
1136  // Check if the pointer with the same offset is found.
1137  int64_t Offset = Diff->getAPInt().getSExtValue();
1138  if (!Offsets.insert(Offset).second)
1139  return false;
1140  OffValPairs.emplace_back(Offset, Ptr);
1141  }
1142  SortedIndices.clear();
1143  SortedIndices.resize(VL.size());
1144  std::iota(SortedIndices.begin(), SortedIndices.end(), 0);
1145 
1146  // Sort the memory accesses and keep the order of their uses in UseOrder.
1147  llvm::stable_sort(SortedIndices, [&](unsigned Left, unsigned Right) {
1148  return OffValPairs[Left].first < OffValPairs[Right].first;
1149  });
1150 
1151  // Check if the order is consecutive already.
1152  if (llvm::all_of(SortedIndices, [&SortedIndices](const unsigned I) {
1153  return I == SortedIndices[I];
1154  }))
1155  SortedIndices.clear();
1156 
1157  return true;
1158 }
1159 
1160 /// Take the address space operand from the Load/Store instruction.
1161 /// Returns -1 if this is not a valid Load/Store instruction.
1162 static unsigned getAddressSpaceOperand(Value *I) {
1163  if (LoadInst *L = dyn_cast<LoadInst>(I))
1164  return L->getPointerAddressSpace();
1165  if (StoreInst *S = dyn_cast<StoreInst>(I))
1166  return S->getPointerAddressSpace();
1167  return -1;
1168 }
1169 
1170 /// Returns true if the memory operations \p A and \p B are consecutive.
1172  ScalarEvolution &SE, bool CheckType) {
1173  Value *PtrA = getLoadStorePointerOperand(A);
1174  Value *PtrB = getLoadStorePointerOperand(B);
1175  unsigned ASA = getAddressSpaceOperand(A);
1176  unsigned ASB = getAddressSpaceOperand(B);
1177 
1178  // Check that the address spaces match and that the pointers are valid.
1179  if (!PtrA || !PtrB || (ASA != ASB))
1180  return false;
1181 
1182  // Make sure that A and B are different pointers.
1183  if (PtrA == PtrB)
1184  return false;
1185 
1186  // Make sure that A and B have the same type if required.
1187  if (CheckType && PtrA->getType() != PtrB->getType())
1188  return false;
1189 
1190  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1191  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1192 
1193  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1194  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1195  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1196 
1197  // Retrieve the address space again as pointer stripping now tracks through
1198  // `addrspacecast`.
1199  ASA = cast<PointerType>(PtrA->getType())->getAddressSpace();
1200  ASB = cast<PointerType>(PtrB->getType())->getAddressSpace();
1201  // Check that the address spaces match and that the pointers are valid.
1202  if (ASA != ASB)
1203  return false;
1204 
1205  IdxWidth = DL.getIndexSizeInBits(ASA);
1206  OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1207  OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1208 
1209  APInt Size(IdxWidth, DL.getTypeStoreSize(Ty));
1210 
1211  // OffsetDelta = OffsetB - OffsetA;
1212  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1213  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1214  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1215  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1216  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1217  // Check if they are based on the same pointer. That makes the offsets
1218  // sufficient.
1219  if (PtrA == PtrB)
1220  return OffsetDelta == Size;
1221 
1222  // Compute the necessary base pointer delta to have the necessary final delta
1223  // equal to the size.
1224  // BaseDelta = Size - OffsetDelta;
1225  const SCEV *SizeSCEV = SE.getConstant(Size);
1226  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1227 
1228  // Otherwise compute the distance with SCEV between the base pointers.
1229  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1230  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1231  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1232  return X == PtrSCEVB;
1233 }
1234 
1237  switch (Type) {
1238  case NoDep:
1239  case Forward:
1240  case BackwardVectorizable:
1241  return VectorizationSafetyStatus::Safe;
1242 
1243  case Unknown:
1244  return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1245  case ForwardButPreventsForwarding:
1246  case Backward:
1247  case BackwardVectorizableButPreventsForwarding:
1248  return VectorizationSafetyStatus::Unsafe;
1249  }
1250  llvm_unreachable("unexpected DepType!");
1251 }
1252 
1254  switch (Type) {
1255  case NoDep:
1256  case Forward:
1257  case ForwardButPreventsForwarding:
1258  case Unknown:
1259  return false;
1260 
1261  case BackwardVectorizable:
1262  case Backward:
1263  case BackwardVectorizableButPreventsForwarding:
1264  return true;
1265  }
1266  llvm_unreachable("unexpected DepType!");
1267 }
1268 
1270  return isBackward() || Type == Unknown;
1271 }
1272 
1274  switch (Type) {
1275  case Forward:
1276  case ForwardButPreventsForwarding:
1277  return true;
1278 
1279  case NoDep:
1280  case Unknown:
1281  case BackwardVectorizable:
1282  case Backward:
1283  case BackwardVectorizableButPreventsForwarding:
1284  return false;
1285  }
1286  llvm_unreachable("unexpected DepType!");
1287 }
1288 
1289 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1290  uint64_t TypeByteSize) {
1291  // If loads occur at a distance that is not a multiple of a feasible vector
1292  // factor store-load forwarding does not take place.
1293  // Positive dependences might cause troubles because vectorizing them might
1294  // prevent store-load forwarding making vectorized code run a lot slower.
1295  // a[i] = a[i-3] ^ a[i-8];
1296  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1297  // hence on your typical architecture store-load forwarding does not take
1298  // place. Vectorizing in such cases does not make sense.
1299  // Store-load forwarding distance.
1300 
1301  // After this many iterations store-to-load forwarding conflicts should not
1302  // cause any slowdowns.
1303  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1304  // Maximum vector factor.
1305  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1306  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1307 
1308  // Compute the smallest VF at which the store and load would be misaligned.
1309  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1310  VF *= 2) {
1311  // If the number of vector iteration between the store and the load are
1312  // small we could incur conflicts.
1313  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1314  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1315  break;
1316  }
1317  }
1318 
1319  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1320  LLVM_DEBUG(
1321  dbgs() << "LAA: Distance " << Distance
1322  << " that could cause a store-load forwarding conflict\n");
1323  return true;
1324  }
1325 
1326  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1327  MaxVFWithoutSLForwardIssues !=
1328  VectorizerParams::MaxVectorWidth * TypeByteSize)
1329  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1330  return false;
1331 }
1332 
1333 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1334  if (Status < S)
1335  Status = S;
1336 }
1337 
1338 /// Given a non-constant (unknown) dependence-distance \p Dist between two
1339 /// memory accesses, that have the same stride whose absolute value is given
1340 /// in \p Stride, and that have the same type size \p TypeByteSize,
1341 /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
1342 /// possible to prove statically that the dependence distance is larger
1343 /// than the range that the accesses will travel through the execution of
1344 /// the loop. If so, return true; false otherwise. This is useful for
1345 /// example in loops such as the following (PR31098):
1346 /// for (i = 0; i < D; ++i) {
1347 /// = out[i];
1348 /// out[i+D] =
1349 /// }
1351  const SCEV &BackedgeTakenCount,
1352  const SCEV &Dist, uint64_t Stride,
1353  uint64_t TypeByteSize) {
1354 
1355  // If we can prove that
1356  // (**) |Dist| > BackedgeTakenCount * Step
1357  // where Step is the absolute stride of the memory accesses in bytes,
1358  // then there is no dependence.
1359  //
1360  // Rationale:
1361  // We basically want to check if the absolute distance (|Dist/Step|)
1362  // is >= the loop iteration count (or > BackedgeTakenCount).
1363  // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1364  // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1365  // that the dependence distance is >= VF; This is checked elsewhere.
1366  // But in some cases we can prune unknown dependence distances early, and
1367  // even before selecting the VF, and without a runtime test, by comparing
1368  // the distance against the loop iteration count. Since the vectorized code
1369  // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1370  // also guarantees that distance >= VF.
1371  //
1372  const uint64_t ByteStride = Stride * TypeByteSize;
1373  const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
1374  const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
1375 
1376  const SCEV *CastedDist = &Dist;
1377  const SCEV *CastedProduct = Product;
1378  uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
1379  uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
1380 
1381  // The dependence distance can be positive/negative, so we sign extend Dist;
1382  // The multiplication of the absolute stride in bytes and the
1383  // backedgeTakenCount is non-negative, so we zero extend Product.
1384  if (DistTypeSize > ProductTypeSize)
1385  CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1386  else
1387  CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1388 
1389  // Is Dist - (BackedgeTakenCount * Step) > 0 ?
1390  // (If so, then we have proven (**) because |Dist| >= Dist)
1391  const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1392  if (SE.isKnownPositive(Minus))
1393  return true;
1394 
1395  // Second try: Is -Dist - (BackedgeTakenCount * Step) > 0 ?
1396  // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1397  const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1398  Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1399  if (SE.isKnownPositive(Minus))
1400  return true;
1401 
1402  return false;
1403 }
1404 
1405 /// Check the dependence for two accesses with the same stride \p Stride.
1406 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1407 /// bytes.
1408 ///
1409 /// \returns true if they are independent.
1410 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1411  uint64_t TypeByteSize) {
1412  assert(Stride > 1 && "The stride must be greater than 1");
1413  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1414  assert(Distance > 0 && "The distance must be non-zero");
1415 
1416  // Skip if the distance is not multiple of type byte size.
1417  if (Distance % TypeByteSize)
1418  return false;
1419 
1420  uint64_t ScaledDist = Distance / TypeByteSize;
1421 
1422  // No dependence if the scaled distance is not multiple of the stride.
1423  // E.g.
1424  // for (i = 0; i < 1024 ; i += 4)
1425  // A[i+2] = A[i] + 1;
1426  //
1427  // Two accesses in memory (scaled distance is 2, stride is 4):
1428  // | A[0] | | | | A[4] | | | |
1429  // | | | A[2] | | | | A[6] | |
1430  //
1431  // E.g.
1432  // for (i = 0; i < 1024 ; i += 3)
1433  // A[i+4] = A[i] + 1;
1434  //
1435  // Two accesses in memory (scaled distance is 4, stride is 3):
1436  // | A[0] | | | A[3] | | | A[6] | | |
1437  // | | | | | A[4] | | | A[7] | |
1438  return ScaledDist % Stride;
1439 }
1440 
1442 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1443  const MemAccessInfo &B, unsigned BIdx,
1444  const ValueToValueMap &Strides) {
1445  assert (AIdx < BIdx && "Must pass arguments in program order");
1446 
1447  Value *APtr = A.getPointer();
1448  Value *BPtr = B.getPointer();
1449  bool AIsWrite = A.getInt();
1450  bool BIsWrite = B.getInt();
1451 
1452  // Two reads are independent.
1453  if (!AIsWrite && !BIsWrite)
1454  return Dependence::NoDep;
1455 
1456  // We cannot check pointers in different address spaces.
1457  if (APtr->getType()->getPointerAddressSpace() !=
1458  BPtr->getType()->getPointerAddressSpace())
1459  return Dependence::Unknown;
1460 
1461  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1462  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1463 
1464  const SCEV *Src = PSE.getSCEV(APtr);
1465  const SCEV *Sink = PSE.getSCEV(BPtr);
1466 
1467  // If the induction step is negative we have to invert source and sink of the
1468  // dependence.
1469  if (StrideAPtr < 0) {
1470  std::swap(APtr, BPtr);
1471  std::swap(Src, Sink);
1472  std::swap(AIsWrite, BIsWrite);
1473  std::swap(AIdx, BIdx);
1474  std::swap(StrideAPtr, StrideBPtr);
1475  }
1476 
1477  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1478 
1479  LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1480  << "(Induction step: " << StrideAPtr << ")\n");
1481  LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1482  << *InstMap[BIdx] << ": " << *Dist << "\n");
1483 
1484  // Need accesses with constant stride. We don't want to vectorize
1485  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1486  // the address space.
1487  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1488  LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1489  return Dependence::Unknown;
1490  }
1491 
1492  Type *ATy = APtr->getType()->getPointerElementType();
1493  Type *BTy = BPtr->getType()->getPointerElementType();
1494  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1495  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1496  uint64_t Stride = std::abs(StrideAPtr);
1497  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1498  if (!C) {
1499  if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
1500  isSafeDependenceDistance(DL, *(PSE.getSE()),
1501  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
1502  TypeByteSize))
1503  return Dependence::NoDep;
1504 
1505  LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1506  FoundNonConstantDistanceDependence = true;
1507  return Dependence::Unknown;
1508  }
1509 
1510  const APInt &Val = C->getAPInt();
1511  int64_t Distance = Val.getSExtValue();
1512 
1513  // Attempt to prove strided accesses independent.
1514  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1515  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1516  LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1517  return Dependence::NoDep;
1518  }
1519 
1520  // Negative distances are not plausible dependencies.
1521  if (Val.isNegative()) {
1522  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1523  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1524  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1525  ATy != BTy)) {
1526  LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1527  return Dependence::ForwardButPreventsForwarding;
1528  }
1529 
1530  LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
1531  return Dependence::Forward;
1532  }
1533 
1534  // Write to the same location with the same size.
1535  // Could be improved to assert type sizes are the same (i32 == float, etc).
1536  if (Val == 0) {
1537  if (ATy == BTy)
1538  return Dependence::Forward;
1539  LLVM_DEBUG(
1540  dbgs() << "LAA: Zero dependence difference but different types\n");
1541  return Dependence::Unknown;
1542  }
1543 
1544  assert(Val.isStrictlyPositive() && "Expect a positive value");
1545 
1546  if (ATy != BTy) {
1547  LLVM_DEBUG(
1548  dbgs()
1549  << "LAA: ReadWrite-Write positive dependency with different types\n");
1550  return Dependence::Unknown;
1551  }
1552 
1553  // Bail out early if passed-in parameters make vectorization not feasible.
1554  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1556  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1558  // The minimum number of iterations for a vectorized/unrolled version.
1559  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1560 
1561  // It's not vectorizable if the distance is smaller than the minimum distance
1562  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1563  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1564  // TypeByteSize (No need to plus the last gap distance).
1565  //
1566  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1567  // foo(int *A) {
1568  // int *B = (int *)((char *)A + 14);
1569  // for (i = 0 ; i < 1024 ; i += 2)
1570  // B[i] = A[i] + 1;
1571  // }
1572  //
1573  // Two accesses in memory (stride is 2):
1574  // | A[0] | | A[2] | | A[4] | | A[6] | |
1575  // | B[0] | | B[2] | | B[4] |
1576  //
1577  // Distance needs for vectorizing iterations except the last iteration:
1578  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1579  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1580  //
1581  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1582  // 12, which is less than distance.
1583  //
1584  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1585  // the minimum distance needed is 28, which is greater than distance. It is
1586  // not safe to do vectorization.
1587  uint64_t MinDistanceNeeded =
1588  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1589  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1590  LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
1591  << Distance << '\n');
1592  return Dependence::Backward;
1593  }
1594 
1595  // Unsafe if the minimum distance needed is greater than max safe distance.
1596  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1597  LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
1598  << MinDistanceNeeded << " size in bytes");
1599  return Dependence::Backward;
1600  }
1601 
1602  // Positive distance bigger than max vectorization factor.
1603  // FIXME: Should use max factor instead of max distance in bytes, which could
1604  // not handle different types.
1605  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1606  // void foo (int *A, char *B) {
1607  // for (unsigned i = 0; i < 1024; i++) {
1608  // A[i+2] = A[i] + 1;
1609  // B[i+2] = B[i] + 1;
1610  // }
1611  // }
1612  //
1613  // This case is currently unsafe according to the max safe distance. If we
1614  // analyze the two accesses on array B, the max safe dependence distance
1615  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1616  // is 8, which is less than 2 and forbidden vectorization, But actually
1617  // both A and B could be vectorized by 2 iterations.
1618  MaxSafeDepDistBytes =
1619  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1620 
1621  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1622  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1623  couldPreventStoreLoadForward(Distance, TypeByteSize))
1624  return Dependence::BackwardVectorizableButPreventsForwarding;
1625 
1626  uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
1627  LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1628  << " with max VF = " << MaxVF << '\n');
1629  uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1630  MaxSafeRegisterWidth = std::min(MaxSafeRegisterWidth, MaxVFInBits);
1631  return Dependence::BackwardVectorizable;
1632 }
1633 
1635  MemAccessInfoList &CheckDeps,
1636  const ValueToValueMap &Strides) {
1637 
1638  MaxSafeDepDistBytes = -1;
1640  for (MemAccessInfo CurAccess : CheckDeps) {
1641  if (Visited.count(CurAccess))
1642  continue;
1643 
1644  // Get the relevant memory access set.
1646  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1647 
1648  // Check accesses within this set.
1650  AccessSets.member_begin(I);
1652  AccessSets.member_end();
1653 
1654  // Check every access pair.
1655  while (AI != AE) {
1656  Visited.insert(*AI);
1657  bool AIIsWrite = AI->getInt();
1658  // Check loads only against next equivalent class, but stores also against
1659  // other stores in the same equivalence class - to the same address.
1661  (AIIsWrite ? AI : std::next(AI));
1662  while (OI != AE) {
1663  // Check every accessing instruction pair in program order.
1664  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1665  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1666  // Scan all accesses of another equivalence class, but only the next
1667  // accesses of the same equivalent class.
1668  for (std::vector<unsigned>::iterator
1669  I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
1670  I2E = (OI == AI ? I1E : Accesses[*OI].end());
1671  I2 != I2E; ++I2) {
1672  auto A = std::make_pair(&*AI, *I1);
1673  auto B = std::make_pair(&*OI, *I2);
1674 
1675  assert(*I1 != *I2);
1676  if (*I1 > *I2)
1677  std::swap(A, B);
1678 
1680  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1681  mergeInStatus(Dependence::isSafeForVectorization(Type));
1682 
1683  // Gather dependences unless we accumulated MaxDependences
1684  // dependences. In that case return as soon as we find the first
1685  // unsafe dependence. This puts a limit on this quadratic
1686  // algorithm.
1687  if (RecordDependences) {
1688  if (Type != Dependence::NoDep)
1689  Dependences.push_back(Dependence(A.second, B.second, Type));
1690 
1691  if (Dependences.size() >= MaxDependences) {
1692  RecordDependences = false;
1693  Dependences.clear();
1694  LLVM_DEBUG(dbgs()
1695  << "Too many dependences, stopped recording\n");
1696  }
1697  }
1698  if (!RecordDependences && !isSafeForVectorization())
1699  return false;
1700  }
1701  ++OI;
1702  }
1703  AI++;
1704  }
1705  }
1706 
1707  LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1708  return isSafeForVectorization();
1709 }
1710 
1713  MemAccessInfo Access(Ptr, isWrite);
1714  auto &IndexVector = Accesses.find(Access)->second;
1715 
1717  transform(IndexVector,
1718  std::back_inserter(Insts),
1719  [&](unsigned Idx) { return this->InstMap[Idx]; });
1720  return Insts;
1721 }
1722 
1723 const char *MemoryDepChecker::Dependence::DepName[] = {
1724  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1725  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1726 
1728  raw_ostream &OS, unsigned Depth,
1729  const SmallVectorImpl<Instruction *> &Instrs) const {
1730  OS.indent(Depth) << DepName[Type] << ":\n";
1731  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1732  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1733 }
1734 
1735 bool LoopAccessInfo::canAnalyzeLoop() {
1736  // We need to have a loop header.
1737  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
1738  << TheLoop->getHeader()->getParent()->getName() << ": "
1739  << TheLoop->getHeader()->getName() << '\n');
1740 
1741  // We can only analyze innermost loops.
1742  if (!TheLoop->empty()) {
1743  LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1744  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1745  return false;
1746  }
1747 
1748  // We must have a single backedge.
1749  if (TheLoop->getNumBackEdges() != 1) {
1750  LLVM_DEBUG(
1751  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1752  recordAnalysis("CFGNotUnderstood")
1753  << "loop control flow is not understood by analyzer";
1754  return false;
1755  }
1756 
1757  // We must have a single exiting block.
1758  if (!TheLoop->getExitingBlock()) {
1759  LLVM_DEBUG(
1760  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1761  recordAnalysis("CFGNotUnderstood")
1762  << "loop control flow is not understood by analyzer";
1763  return false;
1764  }
1765 
1766  // We only handle bottom-tested loops, i.e. loop in which the condition is
1767  // checked at the end of each iteration. With that we can assume that all
1768  // instructions in the loop are executed the same number of times.
1769  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1770  LLVM_DEBUG(
1771  dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1772  recordAnalysis("CFGNotUnderstood")
1773  << "loop control flow is not understood by analyzer";
1774  return false;
1775  }
1776 
1777  // ScalarEvolution needs to be able to find the exit count.
1778  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1779  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1780  recordAnalysis("CantComputeNumberOfIterations")
1781  << "could not determine number of loop iterations";
1782  LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1783  return false;
1784  }
1785 
1786  return true;
1787 }
1788 
1789 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1790  const TargetLibraryInfo *TLI,
1791  DominatorTree *DT) {
1792  typedef SmallPtrSet<Value*, 16> ValueSet;
1793 
1794  // Holds the Load and Store instructions.
1797 
1798  // Holds all the different accesses in the loop.
1799  unsigned NumReads = 0;
1800  unsigned NumReadWrites = 0;
1801 
1802  bool HasComplexMemInst = false;
1803 
1804  // A runtime check is only legal to insert if there are no convergent calls.
1805  HasConvergentOp = false;
1806 
1807  PtrRtChecking->Pointers.clear();
1808  PtrRtChecking->Need = false;
1809 
1810  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1811 
1812  // For each block.
1813  for (BasicBlock *BB : TheLoop->blocks()) {
1814  // Scan the BB and collect legal loads and stores. Also detect any
1815  // convergent instructions.
1816  for (Instruction &I : *BB) {
1817  if (auto *Call = dyn_cast<CallBase>(&I)) {
1818  if (Call->isConvergent())
1819  HasConvergentOp = true;
1820  }
1821 
1822  // With both a non-vectorizable memory instruction and a convergent
1823  // operation, found in this loop, no reason to continue the search.
1824  if (HasComplexMemInst && HasConvergentOp) {
1825  CanVecMem = false;
1826  return;
1827  }
1828 
1829  // Avoid hitting recordAnalysis multiple times.
1830  if (HasComplexMemInst)
1831  continue;
1832 
1833  // If this is a load, save it. If this instruction can read from memory
1834  // but is not a load, then we quit. Notice that we don't handle function
1835  // calls that read or write.
1836  if (I.mayReadFromMemory()) {
1837  // Many math library functions read the rounding mode. We will only
1838  // vectorize a loop if it contains known function calls that don't set
1839  // the flag. Therefore, it is safe to ignore this read from memory.
1840  auto *Call = dyn_cast<CallInst>(&I);
1841  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1842  continue;
1843 
1844  // If the function has an explicit vectorized counterpart, we can safely
1845  // assume that it can be vectorized.
1846  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1847  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1848  continue;
1849 
1850  auto *Ld = dyn_cast<LoadInst>(&I);
1851  if (!Ld) {
1852  recordAnalysis("CantVectorizeInstruction", Ld)
1853  << "instruction cannot be vectorized";
1854  HasComplexMemInst = true;
1855  continue;
1856  }
1857  if (!Ld->isSimple() && !IsAnnotatedParallel) {
1858  recordAnalysis("NonSimpleLoad", Ld)
1859  << "read with atomic ordering or volatile read";
1860  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1861  HasComplexMemInst = true;
1862  continue;
1863  }
1864  NumLoads++;
1865  Loads.push_back(Ld);
1866  DepChecker->addAccess(Ld);
1868  collectStridedAccess(Ld);
1869  continue;
1870  }
1871 
1872  // Save 'store' instructions. Abort if other instructions write to memory.
1873  if (I.mayWriteToMemory()) {
1874  auto *St = dyn_cast<StoreInst>(&I);
1875  if (!St) {
1876  recordAnalysis("CantVectorizeInstruction", St)
1877  << "instruction cannot be vectorized";
1878  HasComplexMemInst = true;
1879  continue;
1880  }
1881  if (!St->isSimple() && !IsAnnotatedParallel) {
1882  recordAnalysis("NonSimpleStore", St)
1883  << "write with atomic ordering or volatile write";
1884  LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1885  HasComplexMemInst = true;
1886  continue;
1887  }
1888  NumStores++;
1889  Stores.push_back(St);
1890  DepChecker->addAccess(St);
1892  collectStridedAccess(St);
1893  }
1894  } // Next instr.
1895  } // Next block.
1896 
1897  if (HasComplexMemInst) {
1898  CanVecMem = false;
1899  return;
1900  }
1901 
1902  // Now we have two lists that hold the loads and the stores.
1903  // Next, we find the pointers that they use.
1904 
1905  // Check if we see any stores. If there are no stores, then we don't
1906  // care if the pointers are *restrict*.
1907  if (!Stores.size()) {
1908  LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1909  CanVecMem = true;
1910  return;
1911  }
1912 
1913  MemoryDepChecker::DepCandidates DependentAccesses;
1914  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1915  TheLoop, AA, LI, DependentAccesses, *PSE);
1916 
1917  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1918  // multiple times on the same object. If the ptr is accessed twice, once
1919  // for read and once for write, it will only appear once (on the write
1920  // list). This is okay, since we are going to check for conflicts between
1921  // writes and between reads and writes, but not between reads and reads.
1922  ValueSet Seen;
1923 
1924  // Record uniform store addresses to identify if we have multiple stores
1925  // to the same address.
1926  ValueSet UniformStores;
1927 
1928  for (StoreInst *ST : Stores) {
1929  Value *Ptr = ST->getPointerOperand();
1930 
1931  if (isUniform(Ptr))
1932  HasDependenceInvolvingLoopInvariantAddress |=
1933  !UniformStores.insert(Ptr).second;
1934 
1935  // If we did *not* see this pointer before, insert it to the read-write
1936  // list. At this phase it is only a 'write' list.
1937  if (Seen.insert(Ptr).second) {
1938  ++NumReadWrites;
1939 
1941  // The TBAA metadata could have a control dependency on the predication
1942  // condition, so we cannot rely on it when determining whether or not we
1943  // need runtime pointer checks.
1944  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1945  Loc.AATags.TBAA = nullptr;
1946 
1947  Accesses.addStore(Loc);
1948  }
1949  }
1950 
1951  if (IsAnnotatedParallel) {
1952  LLVM_DEBUG(
1953  dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
1954  << "checks.\n");
1955  CanVecMem = true;
1956  return;
1957  }
1958 
1959  for (LoadInst *LD : Loads) {
1960  Value *Ptr = LD->getPointerOperand();
1961  // If we did *not* see this pointer before, insert it to the
1962  // read list. If we *did* see it before, then it is already in
1963  // the read-write list. This allows us to vectorize expressions
1964  // such as A[i] += x; Because the address of A[i] is a read-write
1965  // pointer. This only works if the index of A[i] is consecutive.
1966  // If the address of i is unknown (for example A[B[i]]) then we may
1967  // read a few words, modify, and write a few words, and some of the
1968  // words may be written to the same address.
1969  bool IsReadOnlyPtr = false;
1970  if (Seen.insert(Ptr).second ||
1971  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1972  ++NumReads;
1973  IsReadOnlyPtr = true;
1974  }
1975 
1976  // See if there is an unsafe dependency between a load to a uniform address and
1977  // store to the same uniform address.
1978  if (UniformStores.count(Ptr)) {
1979  LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
1980  "load and uniform store to the same address!\n");
1981  HasDependenceInvolvingLoopInvariantAddress = true;
1982  }
1983 
1985  // The TBAA metadata could have a control dependency on the predication
1986  // condition, so we cannot rely on it when determining whether or not we
1987  // need runtime pointer checks.
1988  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1989  Loc.AATags.TBAA = nullptr;
1990 
1991  Accesses.addLoad(Loc, IsReadOnlyPtr);
1992  }
1993 
1994  // If we write (or read-write) to a single destination and there are no
1995  // other reads in this loop then is it safe to vectorize.
1996  if (NumReadWrites == 1 && NumReads == 0) {
1997  LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1998  CanVecMem = true;
1999  return;
2000  }
2001 
2002  // Build dependence sets and check whether we need a runtime pointer bounds
2003  // check.
2004  Accesses.buildDependenceSets();
2005 
2006  // Find pointers with computable bounds. We are going to use this information
2007  // to place a runtime bound check.
2008  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
2009  TheLoop, SymbolicStrides);
2010  if (!CanDoRTIfNeeded) {
2011  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
2012  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2013  << "the array bounds.\n");
2014  CanVecMem = false;
2015  return;
2016  }
2017 
2018  LLVM_DEBUG(
2019  dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2020 
2021  CanVecMem = true;
2022  if (Accesses.isDependencyCheckNeeded()) {
2023  LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2024  CanVecMem = DepChecker->areDepsSafe(
2025  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2026  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2027 
2028  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2029  LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2030 
2031  // Clear the dependency checks. We assume they are not needed.
2032  Accesses.resetDepChecks(*DepChecker);
2033 
2034  PtrRtChecking->reset();
2035  PtrRtChecking->Need = true;
2036 
2037  auto *SE = PSE->getSE();
2038  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
2039  SymbolicStrides, true);
2040 
2041  // Check that we found the bounds for the pointer.
2042  if (!CanDoRTIfNeeded) {
2043  recordAnalysis("CantCheckMemDepsAtRunTime")
2044  << "cannot check memory dependencies at runtime";
2045  LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2046  CanVecMem = false;
2047  return;
2048  }
2049 
2050  CanVecMem = true;
2051  }
2052  }
2053 
2054  if (HasConvergentOp) {
2055  recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2056  << "cannot add control dependency to convergent operation";
2057  LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2058  "would be needed with a convergent operation\n");
2059  CanVecMem = false;
2060  return;
2061  }
2062 
2063  if (CanVecMem)
2064  LLVM_DEBUG(
2065  dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
2066  << (PtrRtChecking->Need ? "" : " don't")
2067  << " need runtime memory checks.\n");
2068  else {
2069  recordAnalysis("UnsafeMemDep")
2070  << "unsafe dependent memory operations in loop. Use "
2071  "#pragma loop distribute(enable) to allow loop distribution "
2072  "to attempt to isolate the offending operations into a separate "
2073  "loop";
2074  LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2075  }
2076 }
2077 
2079  DominatorTree *DT) {
2080  assert(TheLoop->contains(BB) && "Unknown block used");
2081 
2082  // Blocks that do not dominate the latch need predication.
2083  BasicBlock* Latch = TheLoop->getLoopLatch();
2084  return !DT->dominates(BB, Latch);
2085 }
2086 
2087 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
2088  Instruction *I) {
2089  assert(!Report && "Multiple reports generated");
2090 
2091  Value *CodeRegion = TheLoop->getHeader();
2092  DebugLoc DL = TheLoop->getStartLoc();
2093 
2094  if (I) {
2095  CodeRegion = I->getParent();
2096  // If there is no debug location attached to the instruction, revert back to
2097  // using the loop's.
2098  if (I->getDebugLoc())
2099  DL = I->getDebugLoc();
2100  }
2101 
2102  Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2103  CodeRegion);
2104  return *Report;
2105 }
2106 
2108  auto *SE = PSE->getSE();
2109  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
2110  // never considered uniform.
2111  // TODO: Is this really what we want? Even without FP SCEV, we may want some
2112  // trivially loop-invariant FP values to be considered uniform.
2113  if (!SE->isSCEVable(V->getType()))
2114  return false;
2115  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
2116 }
2117 
2118 // FIXME: this function is currently a duplicate of the one in
2119 // LoopVectorize.cpp.
2121  Instruction *Loc) {
2122  if (FirstInst)
2123  return FirstInst;
2124  if (Instruction *I = dyn_cast<Instruction>(V))
2125  return I->getParent() == Loc->getParent() ? I : nullptr;
2126  return nullptr;
2127 }
2128 
2129 namespace {
2130 
2131 /// IR Values for the lower and upper bounds of a pointer evolution. We
2132 /// need to use value-handles because SCEV expansion can invalidate previously
2133 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
2134 /// a previous one.
2135 struct PointerBounds {
2136  TrackingVH<Value> Start;
2137  TrackingVH<Value> End;
2138 };
2139 
2140 } // end anonymous namespace
2141 
2142 /// Expand code for the lower and upper bound of the pointer group \p CG
2143 /// in \p TheLoop. \return the values for the bounds.
2144 static PointerBounds
2146  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
2147  const RuntimePointerChecking &PtrRtChecking) {
2148  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
2149  const SCEV *Sc = SE->getSCEV(Ptr);
2150 
2151  unsigned AS = Ptr->getType()->getPointerAddressSpace();
2152  LLVMContext &Ctx = Loc->getContext();
2153 
2154  // Use this type for pointer arithmetic.
2155  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
2156 
2157  if (SE->isLoopInvariant(Sc, TheLoop)) {
2158  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:"
2159  << *Ptr << "\n");
2160  // Ptr could be in the loop body. If so, expand a new one at the correct
2161  // location.
2162  Instruction *Inst = dyn_cast<Instruction>(Ptr);
2163  Value *NewPtr = (Inst && TheLoop->contains(Inst))
2164  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
2165  : Ptr;
2166  // We must return a half-open range, which means incrementing Sc.
2167  const SCEV *ScPlusOne = SE->getAddExpr(Sc, SE->getOne(PtrArithTy));
2168  Value *NewPtrPlusOne = Exp.expandCodeFor(ScPlusOne, PtrArithTy, Loc);
2169  return {NewPtr, NewPtrPlusOne};
2170  } else {
2171  Value *Start = nullptr, *End = nullptr;
2172  LLVM_DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
2173  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
2174  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
2175  LLVM_DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High
2176  << "\n");
2177  return {Start, End};
2178  }
2179 }
2180 
2181 /// Turns a collection of checks into a collection of expanded upper and
2182 /// lower bounds for both pointers in the check.
2185  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
2186  const RuntimePointerChecking &PtrRtChecking) {
2188 
2189  // Here we're relying on the SCEV Expander's cache to only emit code for the
2190  // same bounds once.
2191  transform(
2192  PointerChecks, std::back_inserter(ChecksWithBounds),
2194  PointerBounds
2195  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
2196  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
2197  return std::make_pair(First, Second);
2198  });
2199 
2200  return ChecksWithBounds;
2201 }
2202 
2203 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
2204  Instruction *Loc,
2206  const {
2207  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2208  auto *SE = PSE->getSE();
2209  SCEVExpander Exp(*SE, DL, "induction");
2210  auto ExpandedChecks =
2211  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
2212 
2213  LLVMContext &Ctx = Loc->getContext();
2214  Instruction *FirstInst = nullptr;
2215  IRBuilder<> ChkBuilder(Loc);
2216  // Our instructions might fold to a constant.
2217  Value *MemoryRuntimeCheck = nullptr;
2218 
2219  for (const auto &Check : ExpandedChecks) {
2220  const PointerBounds &A = Check.first, &B = Check.second;
2221  // Check if two pointers (A and B) conflict where conflict is computed as:
2222  // start(A) <= end(B) && start(B) <= end(A)
2223  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
2224  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
2225 
2226  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
2227  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
2228  "Trying to bounds check pointers with different address spaces");
2229 
2230  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
2231  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
2232 
2233  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
2234  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
2235  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
2236  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
2237 
2238  // [A|B].Start points to the first accessed byte under base [A|B].
2239  // [A|B].End points to the last accessed byte, plus one.
2240  // There is no conflict when the intervals are disjoint:
2241  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
2242  //
2243  // bound0 = (B.Start < A.End)
2244  // bound1 = (A.Start < B.End)
2245  // IsConflict = bound0 & bound1
2246  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
2247  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
2248  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
2249  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
2250  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
2251  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2252  if (MemoryRuntimeCheck) {
2253  IsConflict =
2254  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
2255  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
2256  }
2257  MemoryRuntimeCheck = IsConflict;
2258  }
2259 
2260  if (!MemoryRuntimeCheck)
2261  return std::make_pair(nullptr, nullptr);
2262 
2263  // We have to do this trickery because the IRBuilder might fold the check to a
2264  // constant expression in which case there is no Instruction anchored in a
2265  // the block.
2266  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
2267  ConstantInt::getTrue(Ctx));
2268  ChkBuilder.Insert(Check, "memcheck.conflict");
2269  FirstInst = getFirstInst(FirstInst, Check, Loc);
2270  return std::make_pair(FirstInst, Check);
2271 }
2272 
2273 std::pair<Instruction *, Instruction *>
2275  if (!PtrRtChecking->Need)
2276  return std::make_pair(nullptr, nullptr);
2277 
2278  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
2279 }
2280 
2281 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2282  Value *Ptr = nullptr;
2283  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2284  Ptr = LI->getPointerOperand();
2285  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2286  Ptr = SI->getPointerOperand();
2287  else
2288  return;
2289 
2290  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2291  if (!Stride)
2292  return;
2293 
2294  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2295  "versioning:");
2296  LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2297 
2298  // Avoid adding the "Stride == 1" predicate when we know that
2299  // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2300  // or zero iteration loop, as Trip-Count <= Stride == 1.
2301  //
2302  // TODO: We are currently not making a very informed decision on when it is
2303  // beneficial to apply stride versioning. It might make more sense that the
2304  // users of this analysis (such as the vectorizer) will trigger it, based on
2305  // their specific cost considerations; For example, in cases where stride
2306  // versioning does not help resolving memory accesses/dependences, the
2307  // vectorizer should evaluate the cost of the runtime test, and the benefit
2308  // of various possible stride specializations, considering the alternatives
2309  // of using gather/scatters (if available).
2310 
2311  const SCEV *StrideExpr = PSE->getSCEV(Stride);
2312  const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
2313 
2314  // Match the types so we can compare the stride and the BETakenCount.
2315  // The Stride can be positive/negative, so we sign extend Stride;
2316  // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
2317  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
2318  uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
2319  uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
2320  const SCEV *CastedStride = StrideExpr;
2321  const SCEV *CastedBECount = BETakenCount;
2322  ScalarEvolution *SE = PSE->getSE();
2323  if (BETypeSize >= StrideTypeSize)
2324  CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
2325  else
2326  CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
2327  const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2328  // Since TripCount == BackEdgeTakenCount + 1, checking:
2329  // "Stride >= TripCount" is equivalent to checking:
2330  // Stride - BETakenCount > 0
2331  if (SE->isKnownPositive(StrideMinusBETaken)) {
2332  LLVM_DEBUG(
2333  dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2334  "Stride==1 predicate will imply that the loop executes "
2335  "at most once.\n");
2336  return;
2337  }
2338  LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
2339 
2340  SymbolicStrides[Ptr] = Stride;
2341  StrideSet.insert(Stride);
2342 }
2343 
2345  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2346  DominatorTree *DT, LoopInfo *LI)
2347  : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2348  PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
2349  DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2350  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2351  HasConvergentOp(false),
2352  HasDependenceInvolvingLoopInvariantAddress(false) {
2353  if (canAnalyzeLoop())
2354  analyzeLoop(AA, LI, TLI, DT);
2355 }
2356 
2357 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2358  if (CanVecMem) {
2359  OS.indent(Depth) << "Memory dependences are safe";
2360  if (MaxSafeDepDistBytes != -1ULL)
2361  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2362  << " bytes";
2363  if (PtrRtChecking->Need)
2364  OS << " with run-time checks";
2365  OS << "\n";
2366  }
2367 
2368  if (HasConvergentOp)
2369  OS.indent(Depth) << "Has convergent operation in loop\n";
2370 
2371  if (Report)
2372  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2373 
2374  if (auto *Dependences = DepChecker->getDependences()) {
2375  OS.indent(Depth) << "Dependences:\n";
2376  for (auto &Dep : *Dependences) {
2377  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2378  OS << "\n";
2379  }
2380  } else
2381  OS.indent(Depth) << "Too many dependences, not recorded\n";
2382 
2383  // List the pair of accesses need run-time checks to prove independence.
2384  PtrRtChecking->print(OS, Depth);
2385  OS << "\n";
2386 
2387  OS.indent(Depth) << "Non vectorizable stores to invariant address were "
2388  << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
2389  << "found in loop.\n";
2390 
2391  OS.indent(Depth) << "SCEV assumptions:\n";
2392  PSE->getUnionPredicate().print(OS, Depth);
2393 
2394  OS << "\n";
2395 
2396  OS.indent(Depth) << "Expressions re-written:\n";
2397  PSE->print(OS, Depth);
2398 }
2399 
2401  auto &LAI = LoopAccessInfoMap[L];
2402 
2403  if (!LAI)
2404  LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2405 
2406  return *LAI.get();
2407 }
2408 
2410  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2411 
2412  for (Loop *TopLevelLoop : *LI)
2413  for (Loop *L : depth_first(TopLevelLoop)) {
2414  OS.indent(2) << L->getHeader()->getName() << ":\n";
2415  auto &LAI = LAA.getInfo(L);
2416  LAI.print(OS, 4);
2417  }
2418 }
2419 
2421  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2422  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2423  TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
2424  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2425  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2426  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2427 
2428  return false;
2429 }
2430 
2436 
2437  AU.setPreservesAll();
2438 }
2439 
2441 static const char laa_name[] = "Loop Access Analysis";
2442 #define LAA_NAME "loop-accesses"
2443 
2450 
2452 
2455  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2456 }
2457 
2458 namespace llvm {
2459 
2461  return new LoopAccessLegacyAnalysis();
2462  }
2463 
2464 } // end namespace llvm
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a non-constant (unknown) dependence-distance Dist between two memory accesses, that have the same stride whose absolute value is given in Stride, and that have the same type size TypeByteSize, in a loop whose takenCount is BackedgeTakenCount, check if it is possible to prove statically that the dependence distance is larger than the range that the accesses will travel through the execution of the loop.
#define LAA_NAME
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1808
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
static unsigned RuntimeMemoryCheckThreshold
performing memory disambiguation checks at runtime do not make more than this number of comparisons...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
#define DEBUG_TYPE
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:641
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn&#39;t overflow by adding SCEV predicate.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:399
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
bool isBackward() const
Lexically backward dependence.
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:657
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2118
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
static constexpr LocationSize unknown()
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
void push_back(const T &Elt)
Definition: SmallVector.h:211
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert &#39;NumSpaces&#39; spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:1127
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:733
void reset()
Reset the state of the pointer runtime information.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1165
Pass * createLAAPass()
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
A debug info location.
Definition: DebugLoc.h:33
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:606
block Block Frequency true
An instruction for reading from memory.
Definition: Instructions.h:167
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
void reserve(size_type N)
Definition: SmallVector.h:369
uint64_t High
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1517
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
AnalysisUsage & addRequired()
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:140
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
Definition: BitVector.h:937
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:233
Type * getPointerElementType() const
Definition: Type.h:376
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
static const unsigned MaxVectorWidth
Maximum SIMD width.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:779
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
This file implements a class to represent arbitrary precision integral constant values and operations...
static const char * DepName[]
String version of the types.
const APInt & getAPInt() const
BlockT * getHeader() const
Definition: LoopInfo.h:105
ConstantInt * getValue() const
Key
PAL metadata keys.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1964
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1583
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:938
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:591
An instruction for storing to memory.
Definition: Instructions.h:320
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1294
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
bool isForward() const
Lexically forward dependence.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:875
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
void GetUnderlyingObjects(const Value *V, SmallVectorImpl< const Value *> &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:390
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:327
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
This analysis provides dependence information for the memory accesses of a loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
const SCEV * getAddExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
Represent the analysis usage information of a pass.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
unsigned getAddressSpace() const
Definition: Globals.cpp:111
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:381
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn&#39;t already there.
Definition: SmallSet.h:180
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:607
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
size_t size() const
Definition: SmallVector.h:52
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
Value * stripIntegerCast(Value *V)
const SCEV * getMulExpr(SmallVectorImpl< const SCEV *> &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
bool isNegative() const
Definition: Constants.h:187
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
static const X86InstrFMA3Group Groups[]
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:115
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we&#39;ve proved that V doesn&#39;t wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
bool isFunctionVectorizable(StringRef F, unsigned VF) const
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction *> &Instrs) const
Print the dependence.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:47
Drive the analysis of memory accesses in the loop.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:609
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1580
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Class for arbitrary precision integers.
Definition: APInt.h:69
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:470
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:331
Dependece between memory access instructions.
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1228
iterator end()
Definition: DenseMap.h:82
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1268
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1247
bool empty() const
Definition: LoopInfo.h:151
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:830
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
iterator_range< df_iterator< T > > depth_first(const T &G)
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don&#39;t prevent vectorization.
typename std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:532
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
void stable_sort(R &&Range)
Definition: STLExtras.h:1289
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
A vector that has set insertion semantics.
Definition: SetVector.h:40
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:1208
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
This header defines various interfaces for pass management in LLVM.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1217
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:161
bool sortPtrAccesses(ArrayRef< Value *> VL, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices, if reordering is required.
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:49
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:70
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:448
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:277
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
const BasicBlock * getParent() const
Definition: Instruction.h:66
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
void resize(size_type N)
Definition: SmallVector.h:344