LLVM  4.0.0
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Analysis/LoopInfo.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DataLayout.h"
42 #include "llvm/IR/DebugLoc.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/InstrTypes.h"
49 #include "llvm/IR/Instruction.h"
50 #include "llvm/IR/Instructions.h"
51 #include "llvm/IR/Operator.h"
52 #include "llvm/IR/PassManager.h"
53 #include "llvm/IR/Type.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
59 #include "llvm/Support/Debug.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstdlib>
66 #include <iterator>
67 #include <utility>
68 #include <vector>
69 
70 using namespace llvm;
71 
72 #define DEBUG_TYPE "loop-accesses"
73 
75 VectorizationFactor("force-vector-width", cl::Hidden,
76  cl::desc("Sets the SIMD width. Zero is autoselect."),
79 
81 VectorizationInterleave("force-vector-interleave", cl::Hidden,
82  cl::desc("Sets the vectorization interleave count. "
83  "Zero is autoselect."),
87 
89  "runtime-memory-check-threshold", cl::Hidden,
90  cl::desc("When performing memory disambiguation checks at runtime do not "
91  "generate more than this number of comparisons (default = 8)."),
94 
95 /// \brief The maximum iterations used to merge memory checks
97  "memory-check-merge-threshold", cl::Hidden,
98  cl::desc("Maximum number of comparisons done when trying to merge "
99  "runtime memory checks. (default = 100)"),
100  cl::init(100));
101 
102 /// Maximum SIMD width.
103 const unsigned VectorizerParams::MaxVectorWidth = 64;
104 
105 /// \brief We collect dependences up to this threshold.
106 static cl::opt<unsigned>
107  MaxDependences("max-dependences", cl::Hidden,
108  cl::desc("Maximum number of dependences collected by "
109  "loop-access analysis (default = 100)"),
110  cl::init(100));
111 
112 /// This enables versioning on the strides of symbolically striding memory
113 /// accesses in code like the following.
114 /// for (i = 0; i < N; ++i)
115 /// A[i * Stride1] += B[i * Stride2] ...
116 ///
117 /// Will be roughly translated to
118 /// if (Stride1 == 1 && Stride2 == 1) {
119 /// for (i = 0; i < N; i+=4)
120 /// A[i:i+3] += ...
121 /// } else
122 /// ...
124  "enable-mem-access-versioning", cl::init(true), cl::Hidden,
125  cl::desc("Enable symbolic stride memory access versioning"));
126 
127 /// \brief Enable store-to-load forwarding conflict detection. This option can
128 /// be disabled for correctness testing.
130  "store-to-load-forwarding-conflict-detection", cl::Hidden,
131  cl::desc("Enable conflict detection in loop-access analysis"),
132  cl::init(true));
133 
135  return ::VectorizationInterleave.getNumOccurrences() > 0;
136 }
137 
139  const Loop *TheLoop, const char *PassName,
141  DebugLoc DL = TheLoop->getStartLoc();
142  const Value *V = TheLoop->getHeader();
143  if (const Instruction *I = Message.getInstr()) {
144  // If there is no debug location attached to the instruction, revert back to
145  // using the loop's.
146  if (I->getDebugLoc())
147  DL = I->getDebugLoc();
148  V = I->getParent();
149  }
150  ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
151 }
152 
154  if (auto *CI = dyn_cast<CastInst>(V))
155  if (CI->getOperand(0)->getType()->isIntegerTy())
156  return CI->getOperand(0);
157  return V;
158 }
159 
161  const ValueToValueMap &PtrToStride,
162  Value *Ptr, Value *OrigPtr) {
163  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
164 
165  // If there is an entry in the map return the SCEV of the pointer with the
166  // symbolic stride replaced by one.
168  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
169  if (SI != PtrToStride.end()) {
170  Value *StrideVal = SI->second;
171 
172  // Strip casts.
173  StrideVal = stripIntegerCast(StrideVal);
174 
175  // Replace symbolic stride by one.
176  Value *One = ConstantInt::get(StrideVal->getType(), 1);
177  ValueToValueMap RewriteMap;
178  RewriteMap[StrideVal] = One;
179 
180  ScalarEvolution *SE = PSE.getSE();
181  const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
182  const auto *CT =
183  static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
184 
185  PSE.addPredicate(*SE->getEqualPredicate(U, CT));
186  auto *Expr = PSE.getSCEV(Ptr);
187 
188  DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr
189  << "\n");
190  return Expr;
191  }
192 
193  // Otherwise, just return the SCEV of the original pointer.
194  return OrigSCEV;
195 }
196 
197 /// Calculate Start and End points of memory access.
198 /// Let's assume A is the first access and B is a memory access on N-th loop
199 /// iteration. Then B is calculated as:
200 /// B = A + Step*N .
201 /// Step value may be positive or negative.
202 /// N is a calculated back-edge taken count:
203 /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
204 /// Start and End points are calculated in the following way:
205 /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
206 /// where SizeOfElt is the size of single memory access in bytes.
207 ///
208 /// There is no conflict when the intervals are disjoint:
209 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
210 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
211  unsigned DepSetId, unsigned ASId,
212  const ValueToValueMap &Strides,
214  // Get the stride replaced scev.
215  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
216  ScalarEvolution *SE = PSE.getSE();
217 
218  const SCEV *ScStart;
219  const SCEV *ScEnd;
220 
221  if (SE->isLoopInvariant(Sc, Lp))
222  ScStart = ScEnd = Sc;
223  else {
224  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
225  assert(AR && "Invalid addrec expression");
226  const SCEV *Ex = PSE.getBackedgeTakenCount();
227 
228  ScStart = AR->getStart();
229  ScEnd = AR->evaluateAtIteration(Ex, *SE);
230  const SCEV *Step = AR->getStepRecurrence(*SE);
231 
232  // For expressions with negative step, the upper bound is ScStart and the
233  // lower bound is ScEnd.
234  if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
235  if (CStep->getValue()->isNegative())
236  std::swap(ScStart, ScEnd);
237  } else {
238  // Fallback case: the step is not constant, but we can still
239  // get the upper and lower bounds of the interval by using min/max
240  // expressions.
241  ScStart = SE->getUMinExpr(ScStart, ScEnd);
242  ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
243  }
244  // Add the size of the pointed element to ScEnd.
245  unsigned EltSize =
247  const SCEV *EltSizeSCEV = SE->getConstant(ScEnd->getType(), EltSize);
248  ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
249  }
250 
251  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
252 }
253 
255 RuntimePointerChecking::generateChecks() const {
257 
258  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
259  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
262 
263  if (needsChecking(CGI, CGJ))
264  Checks.push_back(std::make_pair(&CGI, &CGJ));
265  }
266  }
267  return Checks;
268 }
269 
270 void RuntimePointerChecking::generateChecks(
271  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
272  assert(Checks.empty() && "Checks is not empty");
273  groupChecks(DepCands, UseDependencies);
274  Checks = generateChecks();
275 }
276 
278  const CheckingPtrGroup &N) const {
279  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
280  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
281  if (needsChecking(M.Members[I], N.Members[J]))
282  return true;
283  return false;
284 }
285 
286 /// Compare \p I and \p J and return the minimum.
287 /// Return nullptr in case we couldn't find an answer.
288 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
289  ScalarEvolution *SE) {
290  const SCEV *Diff = SE->getMinusSCEV(J, I);
291  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
292 
293  if (!C)
294  return nullptr;
295  if (C->getValue()->isNegative())
296  return J;
297  return I;
298 }
299 
301  const SCEV *Start = RtCheck.Pointers[Index].Start;
302  const SCEV *End = RtCheck.Pointers[Index].End;
303 
304  // Compare the starts and ends with the known minimum and maximum
305  // of this set. We need to know how we compare against the min/max
306  // of the set in order to be able to emit memchecks.
307  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
308  if (!Min0)
309  return false;
310 
311  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
312  if (!Min1)
313  return false;
314 
315  // Update the low bound expression if we've found a new min value.
316  if (Min0 == Start)
317  Low = Start;
318 
319  // Update the high bound expression if we've found a new max value.
320  if (Min1 != End)
321  High = End;
322 
323  Members.push_back(Index);
324  return true;
325 }
326 
327 void RuntimePointerChecking::groupChecks(
328  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
329  // We build the groups from dependency candidates equivalence classes
330  // because:
331  // - We know that pointers in the same equivalence class share
332  // the same underlying object and therefore there is a chance
333  // that we can compare pointers
334  // - We wouldn't be able to merge two pointers for which we need
335  // to emit a memcheck. The classes in DepCands are already
336  // conveniently built such that no two pointers in the same
337  // class need checking against each other.
338 
339  // We use the following (greedy) algorithm to construct the groups
340  // For every pointer in the equivalence class:
341  // For each existing group:
342  // - if the difference between this pointer and the min/max bounds
343  // of the group is a constant, then make the pointer part of the
344  // group and update the min/max bounds of that group as required.
345 
346  CheckingGroups.clear();
347 
348  // If we need to check two pointers to the same underlying object
349  // with a non-constant difference, we shouldn't perform any pointer
350  // grouping with those pointers. This is because we can easily get
351  // into cases where the resulting check would return false, even when
352  // the accesses are safe.
353  //
354  // The following example shows this:
355  // for (i = 0; i < 1000; ++i)
356  // a[5000 + i * m] = a[i] + a[i + 9000]
357  //
358  // Here grouping gives a check of (5000, 5000 + 1000 * m) against
359  // (0, 10000) which is always false. However, if m is 1, there is no
360  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
361  // us to perform an accurate check in this case.
362  //
363  // The above case requires that we have an UnknownDependence between
364  // accesses to the same underlying object. This cannot happen unless
365  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies
366  // is also false. In this case we will use the fallback path and create
367  // separate checking groups for all pointers.
368 
369  // If we don't have the dependency partitions, construct a new
370  // checking pointer group for each pointer. This is also required
371  // for correctness, because in this case we can have checking between
372  // pointers to the same underlying object.
373  if (!UseDependencies) {
374  for (unsigned I = 0; I < Pointers.size(); ++I)
375  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
376  return;
377  }
378 
379  unsigned TotalComparisons = 0;
380 
381  DenseMap<Value *, unsigned> PositionMap;
382  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
383  PositionMap[Pointers[Index].PointerValue] = Index;
384 
385  // We need to keep track of what pointers we've already seen so we
386  // don't process them twice.
388 
389  // Go through all equivalence classes, get the "pointer check groups"
390  // and add them to the overall solution. We use the order in which accesses
391  // appear in 'Pointers' to enforce determinism.
392  for (unsigned I = 0; I < Pointers.size(); ++I) {
393  // We've seen this pointer before, and therefore already processed
394  // its equivalence class.
395  if (Seen.count(I))
396  continue;
397 
398  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
399  Pointers[I].IsWritePtr);
400 
402  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
403 
404  // Because DepCands is constructed by visiting accesses in the order in
405  // which they appear in alias sets (which is deterministic) and the
406  // iteration order within an equivalence class member is only dependent on
407  // the order in which unions and insertions are performed on the
408  // equivalence class, the iteration order is deterministic.
409  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
410  MI != ME; ++MI) {
411  unsigned Pointer = PositionMap[MI->getPointer()];
412  bool Merged = false;
413  // Mark this pointer as seen.
414  Seen.insert(Pointer);
415 
416  // Go through all the existing sets and see if we can find one
417  // which can include this pointer.
418  for (CheckingPtrGroup &Group : Groups) {
419  // Don't perform more than a certain amount of comparisons.
420  // This should limit the cost of grouping the pointers to something
421  // reasonable. If we do end up hitting this threshold, the algorithm
422  // will create separate groups for all remaining pointers.
423  if (TotalComparisons > MemoryCheckMergeThreshold)
424  break;
425 
426  TotalComparisons++;
427 
428  if (Group.addPointer(Pointer)) {
429  Merged = true;
430  break;
431  }
432  }
433 
434  if (!Merged)
435  // We couldn't add this pointer to any existing set or the threshold
436  // for the number of comparisons has been reached. Create a new group
437  // to hold the current pointer.
438  Groups.push_back(CheckingPtrGroup(Pointer, *this));
439  }
440 
441  // We've computed the grouped checks for this partition.
442  // Save the results and continue with the next one.
443  std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
444  }
445 }
446 
448  const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
449  unsigned PtrIdx2) {
450  return (PtrToPartition[PtrIdx1] != -1 &&
451  PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
452 }
453 
454 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
455  const PointerInfo &PointerI = Pointers[I];
456  const PointerInfo &PointerJ = Pointers[J];
457 
458  // No need to check if two readonly pointers intersect.
459  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
460  return false;
461 
462  // Only need to check pointers between two different dependency sets.
463  if (PointerI.DependencySetId == PointerJ.DependencySetId)
464  return false;
465 
466  // Only need to check pointers in the same alias set.
467  if (PointerI.AliasSetId != PointerJ.AliasSetId)
468  return false;
469 
470  return true;
471 }
472 
474  raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks,
475  unsigned Depth) const {
476  unsigned N = 0;
477  for (const auto &Check : Checks) {
478  const auto &First = Check.first->Members, &Second = Check.second->Members;
479 
480  OS.indent(Depth) << "Check " << N++ << ":\n";
481 
482  OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
483  for (unsigned K = 0; K < First.size(); ++K)
484  OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
485 
486  OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
487  for (unsigned K = 0; K < Second.size(); ++K)
488  OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
489  }
490 }
491 
493 
494  OS.indent(Depth) << "Run-time memory checks:\n";
495  printChecks(OS, Checks, Depth);
496 
497  OS.indent(Depth) << "Grouped accesses:\n";
498  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
499  const auto &CG = CheckingGroups[I];
500 
501  OS.indent(Depth + 2) << "Group " << &CG << ":\n";
502  OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
503  << ")\n";
504  for (unsigned J = 0; J < CG.Members.size(); ++J) {
505  OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
506  << "\n";
507  }
508  }
509 }
510 
511 namespace {
512 
513 /// \brief Analyses memory accesses in a loop.
514 ///
515 /// Checks whether run time pointer checks are needed and builds sets for data
516 /// dependence checking.
517 class AccessAnalysis {
518 public:
519  /// \brief Read or write access location.
520  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
521  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
522 
523  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
526  : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false),
527  PSE(PSE) {}
528 
529  /// \brief Register a load and whether it is only read from.
530  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
531  Value *Ptr = const_cast<Value*>(Loc.Ptr);
532  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
533  Accesses.insert(MemAccessInfo(Ptr, false));
534  if (IsReadOnly)
535  ReadOnlyPtr.insert(Ptr);
536  }
537 
538  /// \brief Register a store.
539  void addStore(MemoryLocation &Loc) {
540  Value *Ptr = const_cast<Value*>(Loc.Ptr);
541  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
542  Accesses.insert(MemAccessInfo(Ptr, true));
543  }
544 
545  /// \brief Check whether we can check the pointers at runtime for
546  /// non-intersection.
547  ///
548  /// Returns true if we need no check or if we do and we can generate them
549  /// (i.e. the pointers have computable bounds).
550  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
551  Loop *TheLoop, const ValueToValueMap &Strides,
552  bool ShouldCheckWrap = false);
553 
554  /// \brief Goes over all memory accesses, checks whether a RT check is needed
555  /// and builds sets of dependent accesses.
556  void buildDependenceSets() {
557  processMemAccesses();
558  }
559 
560  /// \brief Initial processing of memory accesses determined that we need to
561  /// perform dependency checking.
562  ///
563  /// Note that this can later be cleared if we retry memcheck analysis without
564  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
565  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
566 
567  /// We decided that no dependence analysis would be used. Reset the state.
568  void resetDepChecks(MemoryDepChecker &DepChecker) {
569  CheckDeps.clear();
570  DepChecker.clearDependences();
571  }
572 
573  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
574 
575 private:
576  typedef SetVector<MemAccessInfo> PtrAccessSet;
577 
578  /// \brief Go over all memory access and check whether runtime pointer checks
579  /// are needed and build sets of dependency check candidates.
580  void processMemAccesses();
581 
582  /// Set of all accesses.
583  PtrAccessSet Accesses;
584 
585  const DataLayout &DL;
586 
587  /// Set of accesses that need a further dependence check.
588  MemAccessInfoSet CheckDeps;
589 
590  /// Set of pointers that are read only.
591  SmallPtrSet<Value*, 16> ReadOnlyPtr;
592 
593  /// An alias set tracker to partition the access set by underlying object and
594  //intrinsic property (such as TBAA metadata).
595  AliasSetTracker AST;
596 
597  LoopInfo *LI;
598 
599  /// Sets of potentially dependent accesses - members of one set share an
600  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
601  /// dependence check.
603 
604  /// \brief Initial processing of memory accesses determined that we may need
605  /// to add memchecks. Perform the analysis to determine the necessary checks.
606  ///
607  /// Note that, this is different from isDependencyCheckNeeded. When we retry
608  /// memcheck analysis without dependency checking
609  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
610  /// while this remains set if we have potentially dependent accesses.
611  bool IsRTCheckAnalysisNeeded;
612 
613  /// The SCEV predicate containing all the SCEV-related assumptions.
615 };
616 
617 } // end anonymous namespace
618 
619 /// \brief Check whether a pointer can participate in a runtime bounds check.
621  const ValueToValueMap &Strides, Value *Ptr,
622  Loop *L) {
623  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
624 
625  // The bounds for loop-invariant pointer is trivial.
626  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
627  return true;
628 
629  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
630  if (!AR)
631  return false;
632 
633  return AR->isAffine();
634 }
635 
636 /// \brief Check whether a pointer address cannot wrap.
638  const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
639  const SCEV *PtrScev = PSE.getSCEV(Ptr);
640  if (PSE.getSE()->isLoopInvariant(PtrScev, L))
641  return true;
642 
643  int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
644  return Stride == 1;
645 }
646 
647 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
648  ScalarEvolution *SE, Loop *TheLoop,
649  const ValueToValueMap &StridesMap,
650  bool ShouldCheckWrap) {
651  // Find pointers with computable bounds. We are going to use this information
652  // to place a runtime bound check.
653  bool CanDoRT = true;
654 
655  bool NeedRTCheck = false;
656  if (!IsRTCheckAnalysisNeeded) return true;
657 
658  bool IsDepCheckNeeded = isDependencyCheckNeeded();
659 
660  // We assign a consecutive id to access from different alias sets.
661  // Accesses between different groups doesn't need to be checked.
662  unsigned ASId = 1;
663  for (auto &AS : AST) {
664  int NumReadPtrChecks = 0;
665  int NumWritePtrChecks = 0;
666 
667  // We assign consecutive id to access from different dependence sets.
668  // Accesses within the same set don't need a runtime check.
669  unsigned RunningDepId = 1;
671 
672  for (auto A : AS) {
673  Value *Ptr = A.getValue();
674  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
675  MemAccessInfo Access(Ptr, IsWrite);
676 
677  if (IsWrite)
678  ++NumWritePtrChecks;
679  else
680  ++NumReadPtrChecks;
681 
682  if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&
683  // When we run after a failing dependency check we have to make sure
684  // we don't have wrapping pointers.
685  (!ShouldCheckWrap || isNoWrap(PSE, StridesMap, Ptr, TheLoop))) {
686  // The id of the dependence set.
687  unsigned DepId;
688 
689  if (IsDepCheckNeeded) {
690  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
691  unsigned &LeaderId = DepSetId[Leader];
692  if (!LeaderId)
693  LeaderId = RunningDepId++;
694  DepId = LeaderId;
695  } else
696  // Each access has its own dependence set.
697  DepId = RunningDepId++;
698 
699  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
700 
701  DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
702  } else {
703  DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
704  CanDoRT = false;
705  }
706  }
707 
708  // If we have at least two writes or one write and a read then we need to
709  // check them. But there is no need to checks if there is only one
710  // dependence set for this alias set.
711  //
712  // Note that this function computes CanDoRT and NeedRTCheck independently.
713  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
714  // for which we couldn't find the bounds but we don't actually need to emit
715  // any checks so it does not matter.
716  if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
717  NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
718  NumWritePtrChecks >= 1));
719 
720  ++ASId;
721  }
722 
723  // If the pointers that we would use for the bounds comparison have different
724  // address spaces, assume the values aren't directly comparable, so we can't
725  // use them for the runtime check. We also have to assume they could
726  // overlap. In the future there should be metadata for whether address spaces
727  // are disjoint.
728  unsigned NumPointers = RtCheck.Pointers.size();
729  for (unsigned i = 0; i < NumPointers; ++i) {
730  for (unsigned j = i + 1; j < NumPointers; ++j) {
731  // Only need to check pointers between two different dependency sets.
732  if (RtCheck.Pointers[i].DependencySetId ==
733  RtCheck.Pointers[j].DependencySetId)
734  continue;
735  // Only need to check pointers in the same alias set.
736  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
737  continue;
738 
739  Value *PtrI = RtCheck.Pointers[i].PointerValue;
740  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
741 
742  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
743  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
744  if (ASi != ASj) {
745  DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
746  " different address spaces\n");
747  return false;
748  }
749  }
750  }
751 
752  if (NeedRTCheck && CanDoRT)
753  RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
754 
755  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
756  << " pointer comparisons.\n");
757 
758  RtCheck.Need = NeedRTCheck;
759 
760  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
761  if (!CanDoRTIfNeeded)
762  RtCheck.reset();
763  return CanDoRTIfNeeded;
764 }
765 
766 void AccessAnalysis::processMemAccesses() {
767  // We process the set twice: first we process read-write pointers, last we
768  // process read-only pointers. This allows us to skip dependence tests for
769  // read-only pointers.
770 
771  DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
772  DEBUG(dbgs() << " AST: "; AST.dump());
773  DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
774  DEBUG({
775  for (auto A : Accesses)
776  dbgs() << "\t" << *A.getPointer() << " (" <<
777  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
778  "read-only" : "read")) << ")\n";
779  });
780 
781  // The AliasSetTracker has nicely partitioned our pointers by metadata
782  // compatibility and potential for underlying-object overlap. As a result, we
783  // only need to check for potential pointer dependencies within each alias
784  // set.
785  for (auto &AS : AST) {
786  // Note that both the alias-set tracker and the alias sets themselves used
787  // linked lists internally and so the iteration order here is deterministic
788  // (matching the original instruction order within each set).
789 
790  bool SetHasWrite = false;
791 
792  // Map of pointers to last access encountered.
793  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
794  UnderlyingObjToAccessMap ObjToLastAccess;
795 
796  // Set of access to check after all writes have been processed.
797  PtrAccessSet DeferredAccesses;
798 
799  // Iterate over each alias set twice, once to process read/write pointers,
800  // and then to process read-only pointers.
801  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
802  bool UseDeferred = SetIteration > 0;
803  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
804 
805  for (auto AV : AS) {
806  Value *Ptr = AV.getValue();
807 
808  // For a single memory access in AliasSetTracker, Accesses may contain
809  // both read and write, and they both need to be handled for CheckDeps.
810  for (auto AC : S) {
811  if (AC.getPointer() != Ptr)
812  continue;
813 
814  bool IsWrite = AC.getInt();
815 
816  // If we're using the deferred access set, then it contains only
817  // reads.
818  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
819  if (UseDeferred && !IsReadOnlyPtr)
820  continue;
821  // Otherwise, the pointer must be in the PtrAccessSet, either as a
822  // read or a write.
823  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
824  S.count(MemAccessInfo(Ptr, false))) &&
825  "Alias-set pointer not in the access set?");
826 
827  MemAccessInfo Access(Ptr, IsWrite);
828  DepCands.insert(Access);
829 
830  // Memorize read-only pointers for later processing and skip them in
831  // the first round (they need to be checked after we have seen all
832  // write pointers). Note: we also mark pointer that are not
833  // consecutive as "read-only" pointers (so that we check
834  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
835  if (!UseDeferred && IsReadOnlyPtr) {
836  DeferredAccesses.insert(Access);
837  continue;
838  }
839 
840  // If this is a write - check other reads and writes for conflicts. If
841  // this is a read only check other writes for conflicts (but only if
842  // there is no other write to the ptr - this is an optimization to
843  // catch "a[i] = a[i] + " without having to do a dependence check).
844  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
845  CheckDeps.insert(Access);
846  IsRTCheckAnalysisNeeded = true;
847  }
848 
849  if (IsWrite)
850  SetHasWrite = true;
851 
852  // Create sets of pointers connected by a shared alias set and
853  // underlying object.
854  typedef SmallVector<Value *, 16> ValueVector;
855  ValueVector TempObjects;
856 
857  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
858  DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
859  for (Value *UnderlyingObj : TempObjects) {
860  // nullptr never alias, don't join sets for pointer that have "null"
861  // in their UnderlyingObjects list.
862  if (isa<ConstantPointerNull>(UnderlyingObj))
863  continue;
864 
865  UnderlyingObjToAccessMap::iterator Prev =
866  ObjToLastAccess.find(UnderlyingObj);
867  if (Prev != ObjToLastAccess.end())
868  DepCands.unionSets(Access, Prev->second);
869 
870  ObjToLastAccess[UnderlyingObj] = Access;
871  DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
872  }
873  }
874  }
875  }
876  }
877 }
878 
879 static bool isInBoundsGep(Value *Ptr) {
880  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
881  return GEP->isInBounds();
882  return false;
883 }
884 
885 /// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
886 /// i.e. monotonically increasing/decreasing.
887 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
888  PredicatedScalarEvolution &PSE, const Loop *L) {
889  // FIXME: This should probably only return true for NUW.
891  return true;
892 
893  // Scalar evolution does not propagate the non-wrapping flags to values that
894  // are derived from a non-wrapping induction variable because non-wrapping
895  // could be flow-sensitive.
896  //
897  // Look through the potentially overflowing instruction to try to prove
898  // non-wrapping for the *specific* value of Ptr.
899 
900  // The arithmetic implied by an inbounds GEP can't overflow.
901  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
902  if (!GEP || !GEP->isInBounds())
903  return false;
904 
905  // Make sure there is only one non-const index and analyze that.
906  Value *NonConstIndex = nullptr;
907  for (Value *Index : make_range(GEP->idx_begin(), GEP->idx_end()))
908  if (!isa<ConstantInt>(Index)) {
909  if (NonConstIndex)
910  return false;
911  NonConstIndex = Index;
912  }
913  if (!NonConstIndex)
914  // The recurrence is on the pointer, ignore for now.
915  return false;
916 
917  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
918  // AddRec using a NSW operation.
919  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
920  if (OBO->hasNoSignedWrap() &&
921  // Assume constant for other the operand so that the AddRec can be
922  // easily found.
923  isa<ConstantInt>(OBO->getOperand(1))) {
924  auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
925 
926  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
927  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
928  }
929 
930  return false;
931 }
932 
933 /// \brief Check whether the access through \p Ptr has a constant stride.
935  const Loop *Lp, const ValueToValueMap &StridesMap,
936  bool Assume, bool ShouldCheckWrap) {
937  Type *Ty = Ptr->getType();
938  assert(Ty->isPointerTy() && "Unexpected non-ptr");
939 
940  // Make sure that the pointer does not point to aggregate types.
941  auto *PtrTy = cast<PointerType>(Ty);
942  if (PtrTy->getElementType()->isAggregateType()) {
943  DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type" << *Ptr
944  << "\n");
945  return 0;
946  }
947 
948  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
949 
950  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
951  if (Assume && !AR)
952  AR = PSE.getAsAddRec(Ptr);
953 
954  if (!AR) {
955  DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
956  << " SCEV: " << *PtrScev << "\n");
957  return 0;
958  }
959 
960  // The accesss function must stride over the innermost loop.
961  if (Lp != AR->getLoop()) {
962  DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
963  *Ptr << " SCEV: " << *AR << "\n");
964  return 0;
965  }
966 
967  // The address calculation must not wrap. Otherwise, a dependence could be
968  // inverted.
969  // An inbounds getelementptr that is a AddRec with a unit stride
970  // cannot wrap per definition. The unit stride requirement is checked later.
971  // An getelementptr without an inbounds attribute and unit stride would have
972  // to access the pointer value "0" which is undefined behavior in address
973  // space 0, therefore we can also vectorize this case.
974  bool IsInBoundsGEP = isInBoundsGep(Ptr);
975  bool IsNoWrapAddRec = !ShouldCheckWrap ||
977  isNoWrapAddRec(Ptr, AR, PSE, Lp);
978  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
979  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
980  if (Assume) {
982  IsNoWrapAddRec = true;
983  DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
984  << "LAA: Pointer: " << *Ptr << "\n"
985  << "LAA: SCEV: " << *AR << "\n"
986  << "LAA: Added an overflow assumption\n");
987  } else {
988  DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
989  << *Ptr << " SCEV: " << *AR << "\n");
990  return 0;
991  }
992  }
993 
994  // Check the step is constant.
995  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
996 
997  // Calculate the pointer stride and check if it is constant.
998  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
999  if (!C) {
1000  DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
1001  " SCEV: " << *AR << "\n");
1002  return 0;
1003  }
1004 
1005  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
1006  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
1007  const APInt &APStepVal = C->getAPInt();
1008 
1009  // Huge step value - give up.
1010  if (APStepVal.getBitWidth() > 64)
1011  return 0;
1012 
1013  int64_t StepVal = APStepVal.getSExtValue();
1014 
1015  // Strided access.
1016  int64_t Stride = StepVal / Size;
1017  int64_t Rem = StepVal % Size;
1018  if (Rem)
1019  return 0;
1020 
1021  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
1022  // know we can't "wrap around the address space". In case of address space
1023  // zero we know that this won't happen without triggering undefined behavior.
1024  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
1025  Stride != 1 && Stride != -1) {
1026  if (Assume) {
1027  // We can avoid this case by adding a run-time check.
1028  DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
1029  << "inbouds or in address space 0 may wrap:\n"
1030  << "LAA: Pointer: " << *Ptr << "\n"
1031  << "LAA: SCEV: " << *AR << "\n"
1032  << "LAA: Added an overflow assumption\n");
1034  } else
1035  return 0;
1036  }
1037 
1038  return Stride;
1039 }
1040 
1041 /// Take the pointer operand from the Load/Store instruction.
1042 /// Returns NULL if this is not a valid Load/Store instruction.
1044  if (auto *LI = dyn_cast<LoadInst>(I))
1045  return LI->getPointerOperand();
1046  if (auto *SI = dyn_cast<StoreInst>(I))
1047  return SI->getPointerOperand();
1048  return nullptr;
1049 }
1050 
1051 /// Take the address space operand from the Load/Store instruction.
1052 /// Returns -1 if this is not a valid Load/Store instruction.
1053 static unsigned getAddressSpaceOperand(Value *I) {
1054  if (LoadInst *L = dyn_cast<LoadInst>(I))
1055  return L->getPointerAddressSpace();
1056  if (StoreInst *S = dyn_cast<StoreInst>(I))
1057  return S->getPointerAddressSpace();
1058  return -1;
1059 }
1060 
1061 /// Returns true if the memory operations \p A and \p B are consecutive.
1063  ScalarEvolution &SE, bool CheckType) {
1064  Value *PtrA = getPointerOperand(A);
1065  Value *PtrB = getPointerOperand(B);
1066  unsigned ASA = getAddressSpaceOperand(A);
1067  unsigned ASB = getAddressSpaceOperand(B);
1068 
1069  // Check that the address spaces match and that the pointers are valid.
1070  if (!PtrA || !PtrB || (ASA != ASB))
1071  return false;
1072 
1073  // Make sure that A and B are different pointers.
1074  if (PtrA == PtrB)
1075  return false;
1076 
1077  // Make sure that A and B have the same type if required.
1078  if (CheckType && PtrA->getType() != PtrB->getType())
1079  return false;
1080 
1081  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
1082  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1083  APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
1084 
1085  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
1086  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1087  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1088 
1089  // OffsetDelta = OffsetB - OffsetA;
1090  const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
1091  const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
1092  const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
1093  const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
1094  const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
1095  // Check if they are based on the same pointer. That makes the offsets
1096  // sufficient.
1097  if (PtrA == PtrB)
1098  return OffsetDelta == Size;
1099 
1100  // Compute the necessary base pointer delta to have the necessary final delta
1101  // equal to the size.
1102  // BaseDelta = Size - OffsetDelta;
1103  const SCEV *SizeSCEV = SE.getConstant(Size);
1104  const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
1105 
1106  // Otherwise compute the distance with SCEV between the base pointers.
1107  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1108  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1109  const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
1110  return X == PtrSCEVB;
1111 }
1112 
1114  switch (Type) {
1115  case NoDep:
1116  case Forward:
1117  case BackwardVectorizable:
1118  return true;
1119 
1120  case Unknown:
1121  case ForwardButPreventsForwarding:
1122  case Backward:
1123  case BackwardVectorizableButPreventsForwarding:
1124  return false;
1125  }
1126  llvm_unreachable("unexpected DepType!");
1127 }
1128 
1130  switch (Type) {
1131  case NoDep:
1132  case Forward:
1133  case ForwardButPreventsForwarding:
1134  case Unknown:
1135  return false;
1136 
1137  case BackwardVectorizable:
1138  case Backward:
1139  case BackwardVectorizableButPreventsForwarding:
1140  return true;
1141  }
1142  llvm_unreachable("unexpected DepType!");
1143 }
1144 
1146  return isBackward() || Type == Unknown;
1147 }
1148 
1150  switch (Type) {
1151  case Forward:
1152  case ForwardButPreventsForwarding:
1153  return true;
1154 
1155  case NoDep:
1156  case Unknown:
1157  case BackwardVectorizable:
1158  case Backward:
1159  case BackwardVectorizableButPreventsForwarding:
1160  return false;
1161  }
1162  llvm_unreachable("unexpected DepType!");
1163 }
1164 
1165 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1166  uint64_t TypeByteSize) {
1167  // If loads occur at a distance that is not a multiple of a feasible vector
1168  // factor store-load forwarding does not take place.
1169  // Positive dependences might cause troubles because vectorizing them might
1170  // prevent store-load forwarding making vectorized code run a lot slower.
1171  // a[i] = a[i-3] ^ a[i-8];
1172  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1173  // hence on your typical architecture store-load forwarding does not take
1174  // place. Vectorizing in such cases does not make sense.
1175  // Store-load forwarding distance.
1176 
1177  // After this many iterations store-to-load forwarding conflicts should not
1178  // cause any slowdowns.
1179  const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1180  // Maximum vector factor.
1181  uint64_t MaxVFWithoutSLForwardIssues = std::min(
1182  VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
1183 
1184  // Compute the smallest VF at which the store and load would be misaligned.
1185  for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1186  VF *= 2) {
1187  // If the number of vector iteration between the store and the load are
1188  // small we could incur conflicts.
1189  if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1190  MaxVFWithoutSLForwardIssues = (VF >>= 1);
1191  break;
1192  }
1193  }
1194 
1195  if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1196  DEBUG(dbgs() << "LAA: Distance " << Distance
1197  << " that could cause a store-load forwarding conflict\n");
1198  return true;
1199  }
1200 
1201  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1202  MaxVFWithoutSLForwardIssues !=
1203  VectorizerParams::MaxVectorWidth * TypeByteSize)
1204  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1205  return false;
1206 }
1207 
1208 /// \brief Check the dependence for two accesses with the same stride \p Stride.
1209 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1210 /// bytes.
1211 ///
1212 /// \returns true if they are independent.
1213 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1214  uint64_t TypeByteSize) {
1215  assert(Stride > 1 && "The stride must be greater than 1");
1216  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1217  assert(Distance > 0 && "The distance must be non-zero");
1218 
1219  // Skip if the distance is not multiple of type byte size.
1220  if (Distance % TypeByteSize)
1221  return false;
1222 
1223  uint64_t ScaledDist = Distance / TypeByteSize;
1224 
1225  // No dependence if the scaled distance is not multiple of the stride.
1226  // E.g.
1227  // for (i = 0; i < 1024 ; i += 4)
1228  // A[i+2] = A[i] + 1;
1229  //
1230  // Two accesses in memory (scaled distance is 2, stride is 4):
1231  // | A[0] | | | | A[4] | | | |
1232  // | | | A[2] | | | | A[6] | |
1233  //
1234  // E.g.
1235  // for (i = 0; i < 1024 ; i += 3)
1236  // A[i+4] = A[i] + 1;
1237  //
1238  // Two accesses in memory (scaled distance is 4, stride is 3):
1239  // | A[0] | | | A[3] | | | A[6] | | |
1240  // | | | | | A[4] | | | A[7] | |
1241  return ScaledDist % Stride;
1242 }
1243 
1245 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
1246  const MemAccessInfo &B, unsigned BIdx,
1247  const ValueToValueMap &Strides) {
1248  assert (AIdx < BIdx && "Must pass arguments in program order");
1249 
1250  Value *APtr = A.getPointer();
1251  Value *BPtr = B.getPointer();
1252  bool AIsWrite = A.getInt();
1253  bool BIsWrite = B.getInt();
1254 
1255  // Two reads are independent.
1256  if (!AIsWrite && !BIsWrite)
1257  return Dependence::NoDep;
1258 
1259  // We cannot check pointers in different address spaces.
1260  if (APtr->getType()->getPointerAddressSpace() !=
1261  BPtr->getType()->getPointerAddressSpace())
1262  return Dependence::Unknown;
1263 
1264  int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
1265  int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
1266 
1267  const SCEV *Src = PSE.getSCEV(APtr);
1268  const SCEV *Sink = PSE.getSCEV(BPtr);
1269 
1270  // If the induction step is negative we have to invert source and sink of the
1271  // dependence.
1272  if (StrideAPtr < 0) {
1273  std::swap(APtr, BPtr);
1274  std::swap(Src, Sink);
1275  std::swap(AIsWrite, BIsWrite);
1276  std::swap(AIdx, BIdx);
1277  std::swap(StrideAPtr, StrideBPtr);
1278  }
1279 
1280  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
1281 
1282  DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1283  << "(Induction step: " << StrideAPtr << ")\n");
1284  DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1285  << *InstMap[BIdx] << ": " << *Dist << "\n");
1286 
1287  // Need accesses with constant stride. We don't want to vectorize
1288  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1289  // the address space.
1290  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1291  DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1292  return Dependence::Unknown;
1293  }
1294 
1295  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1296  if (!C) {
1297  DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1298  ShouldRetryWithRuntimeCheck = true;
1299  return Dependence::Unknown;
1300  }
1301 
1302  Type *ATy = APtr->getType()->getPointerElementType();
1303  Type *BTy = BPtr->getType()->getPointerElementType();
1304  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1305  uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1306 
1307  const APInt &Val = C->getAPInt();
1308  int64_t Distance = Val.getSExtValue();
1309  uint64_t Stride = std::abs(StrideAPtr);
1310 
1311  // Attempt to prove strided accesses independent.
1312  if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
1313  areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
1314  DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1315  return Dependence::NoDep;
1316  }
1317 
1318  // Negative distances are not plausible dependencies.
1319  if (Val.isNegative()) {
1320  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1321  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1322  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1323  ATy != BTy)) {
1324  DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
1325  return Dependence::ForwardButPreventsForwarding;
1326  }
1327 
1328  DEBUG(dbgs() << "LAA: Dependence is negative\n");
1329  return Dependence::Forward;
1330  }
1331 
1332  // Write to the same location with the same size.
1333  // Could be improved to assert type sizes are the same (i32 == float, etc).
1334  if (Val == 0) {
1335  if (ATy == BTy)
1336  return Dependence::Forward;
1337  DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
1338  return Dependence::Unknown;
1339  }
1340 
1341  assert(Val.isStrictlyPositive() && "Expect a positive value");
1342 
1343  if (ATy != BTy) {
1344  DEBUG(dbgs() <<
1345  "LAA: ReadWrite-Write positive dependency with different types\n");
1346  return Dependence::Unknown;
1347  }
1348 
1349  // Bail out early if passed-in parameters make vectorization not feasible.
1350  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1352  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1354  // The minimum number of iterations for a vectorized/unrolled version.
1355  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1356 
1357  // It's not vectorizable if the distance is smaller than the minimum distance
1358  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1359  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1360  // TypeByteSize (No need to plus the last gap distance).
1361  //
1362  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1363  // foo(int *A) {
1364  // int *B = (int *)((char *)A + 14);
1365  // for (i = 0 ; i < 1024 ; i += 2)
1366  // B[i] = A[i] + 1;
1367  // }
1368  //
1369  // Two accesses in memory (stride is 2):
1370  // | A[0] | | A[2] | | A[4] | | A[6] | |
1371  // | B[0] | | B[2] | | B[4] |
1372  //
1373  // Distance needs for vectorizing iterations except the last iteration:
1374  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1375  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1376  //
1377  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1378  // 12, which is less than distance.
1379  //
1380  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1381  // the minimum distance needed is 28, which is greater than distance. It is
1382  // not safe to do vectorization.
1383  uint64_t MinDistanceNeeded =
1384  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1385  if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
1386  DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
1387  << '\n');
1388  return Dependence::Backward;
1389  }
1390 
1391  // Unsafe if the minimum distance needed is greater than max safe distance.
1392  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1393  DEBUG(dbgs() << "LAA: Failure because it needs at least "
1394  << MinDistanceNeeded << " size in bytes");
1395  return Dependence::Backward;
1396  }
1397 
1398  // Positive distance bigger than max vectorization factor.
1399  // FIXME: Should use max factor instead of max distance in bytes, which could
1400  // not handle different types.
1401  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1402  // void foo (int *A, char *B) {
1403  // for (unsigned i = 0; i < 1024; i++) {
1404  // A[i+2] = A[i] + 1;
1405  // B[i+2] = B[i] + 1;
1406  // }
1407  // }
1408  //
1409  // This case is currently unsafe according to the max safe distance. If we
1410  // analyze the two accesses on array B, the max safe dependence distance
1411  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1412  // is 8, which is less than 2 and forbidden vectorization, But actually
1413  // both A and B could be vectorized by 2 iterations.
1414  MaxSafeDepDistBytes =
1415  std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
1416 
1417  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1418  if (IsTrueDataDependence && EnableForwardingConflictDetection &&
1419  couldPreventStoreLoadForward(Distance, TypeByteSize))
1420  return Dependence::BackwardVectorizableButPreventsForwarding;
1421 
1422  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1423  << " with max VF = "
1424  << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
1425 
1426  return Dependence::BackwardVectorizable;
1427 }
1428 
1430  MemAccessInfoSet &CheckDeps,
1431  const ValueToValueMap &Strides) {
1432 
1433  MaxSafeDepDistBytes = -1;
1434  while (!CheckDeps.empty()) {
1435  MemAccessInfo CurAccess = *CheckDeps.begin();
1436 
1437  // Get the relevant memory access set.
1439  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1440 
1441  // Check accesses within this set.
1443  AccessSets.member_begin(I);
1445  AccessSets.member_end();
1446 
1447  // Check every access pair.
1448  while (AI != AE) {
1449  CheckDeps.erase(*AI);
1451  while (OI != AE) {
1452  // Check every accessing instruction pair in program order.
1453  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1454  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1455  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1456  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1457  auto A = std::make_pair(&*AI, *I1);
1458  auto B = std::make_pair(&*OI, *I2);
1459 
1460  assert(*I1 != *I2);
1461  if (*I1 > *I2)
1462  std::swap(A, B);
1463 
1465  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1466  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1467 
1468  // Gather dependences unless we accumulated MaxDependences
1469  // dependences. In that case return as soon as we find the first
1470  // unsafe dependence. This puts a limit on this quadratic
1471  // algorithm.
1472  if (RecordDependences) {
1473  if (Type != Dependence::NoDep)
1474  Dependences.push_back(Dependence(A.second, B.second, Type));
1475 
1476  if (Dependences.size() >= MaxDependences) {
1477  RecordDependences = false;
1478  Dependences.clear();
1479  DEBUG(dbgs() << "Too many dependences, stopped recording\n");
1480  }
1481  }
1482  if (!RecordDependences && !SafeForVectorization)
1483  return false;
1484  }
1485  ++OI;
1486  }
1487  AI++;
1488  }
1489  }
1490 
1491  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
1492  return SafeForVectorization;
1493 }
1494 
1497  MemAccessInfo Access(Ptr, isWrite);
1498  auto &IndexVector = Accesses.find(Access)->second;
1499 
1501  transform(IndexVector,
1502  std::back_inserter(Insts),
1503  [&](unsigned Idx) { return this->InstMap[Idx]; });
1504  return Insts;
1505 }
1506 
1507 const char *MemoryDepChecker::Dependence::DepName[] = {
1508  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1509  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1510 
1512  raw_ostream &OS, unsigned Depth,
1513  const SmallVectorImpl<Instruction *> &Instrs) const {
1514  OS.indent(Depth) << DepName[Type] << ":\n";
1515  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1516  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1517 }
1518 
1519 bool LoopAccessInfo::canAnalyzeLoop() {
1520  // We need to have a loop header.
1521  DEBUG(dbgs() << "LAA: Found a loop in "
1522  << TheLoop->getHeader()->getParent()->getName() << ": "
1523  << TheLoop->getHeader()->getName() << '\n');
1524 
1525  // We can only analyze innermost loops.
1526  if (!TheLoop->empty()) {
1527  DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1528  recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
1529  return false;
1530  }
1531 
1532  // We must have a single backedge.
1533  if (TheLoop->getNumBackEdges() != 1) {
1534  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1535  recordAnalysis("CFGNotUnderstood")
1536  << "loop control flow is not understood by analyzer";
1537  return false;
1538  }
1539 
1540  // We must have a single exiting block.
1541  if (!TheLoop->getExitingBlock()) {
1542  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1543  recordAnalysis("CFGNotUnderstood")
1544  << "loop control flow is not understood by analyzer";
1545  return false;
1546  }
1547 
1548  // We only handle bottom-tested loops, i.e. loop in which the condition is
1549  // checked at the end of each iteration. With that we can assume that all
1550  // instructions in the loop are executed the same number of times.
1551  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1552  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1553  recordAnalysis("CFGNotUnderstood")
1554  << "loop control flow is not understood by analyzer";
1555  return false;
1556  }
1557 
1558  // ScalarEvolution needs to be able to find the exit count.
1559  const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1560  if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
1561  recordAnalysis("CantComputeNumberOfIterations")
1562  << "could not determine number of loop iterations";
1563  DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1564  return false;
1565  }
1566 
1567  return true;
1568 }
1569 
1570 void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
1571  const TargetLibraryInfo *TLI,
1572  DominatorTree *DT) {
1573  typedef SmallPtrSet<Value*, 16> ValueSet;
1574 
1575  // Holds the Load and Store instructions.
1578 
1579  // Holds all the different accesses in the loop.
1580  unsigned NumReads = 0;
1581  unsigned NumReadWrites = 0;
1582 
1583  PtrRtChecking->Pointers.clear();
1584  PtrRtChecking->Need = false;
1585 
1586  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1587 
1588  // For each block.
1589  for (BasicBlock *BB : TheLoop->blocks()) {
1590  // Scan the BB and collect legal loads and stores.
1591  for (Instruction &I : *BB) {
1592  // If this is a load, save it. If this instruction can read from memory
1593  // but is not a load, then we quit. Notice that we don't handle function
1594  // calls that read or write.
1595  if (I.mayReadFromMemory()) {
1596  // Many math library functions read the rounding mode. We will only
1597  // vectorize a loop if it contains known function calls that don't set
1598  // the flag. Therefore, it is safe to ignore this read from memory.
1599  auto *Call = dyn_cast<CallInst>(&I);
1600  if (Call && getVectorIntrinsicIDForCall(Call, TLI))
1601  continue;
1602 
1603  // If the function has an explicit vectorized counterpart, we can safely
1604  // assume that it can be vectorized.
1605  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1606  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1607  continue;
1608 
1609  auto *Ld = dyn_cast<LoadInst>(&I);
1610  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1611  recordAnalysis("NonSimpleLoad", Ld)
1612  << "read with atomic ordering or volatile read";
1613  DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1614  CanVecMem = false;
1615  return;
1616  }
1617  NumLoads++;
1618  Loads.push_back(Ld);
1619  DepChecker->addAccess(Ld);
1621  collectStridedAccess(Ld);
1622  continue;
1623  }
1624 
1625  // Save 'store' instructions. Abort if other instructions write to memory.
1626  if (I.mayWriteToMemory()) {
1627  auto *St = dyn_cast<StoreInst>(&I);
1628  if (!St) {
1629  recordAnalysis("CantVectorizeInstruction", St)
1630  << "instruction cannot be vectorized";
1631  CanVecMem = false;
1632  return;
1633  }
1634  if (!St->isSimple() && !IsAnnotatedParallel) {
1635  recordAnalysis("NonSimpleStore", St)
1636  << "write with atomic ordering or volatile write";
1637  DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1638  CanVecMem = false;
1639  return;
1640  }
1641  NumStores++;
1642  Stores.push_back(St);
1643  DepChecker->addAccess(St);
1645  collectStridedAccess(St);
1646  }
1647  } // Next instr.
1648  } // Next block.
1649 
1650  // Now we have two lists that hold the loads and the stores.
1651  // Next, we find the pointers that they use.
1652 
1653  // Check if we see any stores. If there are no stores, then we don't
1654  // care if the pointers are *restrict*.
1655  if (!Stores.size()) {
1656  DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1657  CanVecMem = true;
1658  return;
1659  }
1660 
1661  MemoryDepChecker::DepCandidates DependentAccesses;
1662  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1663  AA, LI, DependentAccesses, *PSE);
1664 
1665  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1666  // multiple times on the same object. If the ptr is accessed twice, once
1667  // for read and once for write, it will only appear once (on the write
1668  // list). This is okay, since we are going to check for conflicts between
1669  // writes and between reads and writes, but not between reads and reads.
1670  ValueSet Seen;
1671 
1672  for (StoreInst *ST : Stores) {
1673  Value *Ptr = ST->getPointerOperand();
1674  // Check for store to loop invariant address.
1675  StoreToLoopInvariantAddress |= isUniform(Ptr);
1676  // If we did *not* see this pointer before, insert it to the read-write
1677  // list. At this phase it is only a 'write' list.
1678  if (Seen.insert(Ptr).second) {
1679  ++NumReadWrites;
1680 
1682  // The TBAA metadata could have a control dependency on the predication
1683  // condition, so we cannot rely on it when determining whether or not we
1684  // need runtime pointer checks.
1685  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1686  Loc.AATags.TBAA = nullptr;
1687 
1688  Accesses.addStore(Loc);
1689  }
1690  }
1691 
1692  if (IsAnnotatedParallel) {
1693  DEBUG(dbgs()
1694  << "LAA: A loop annotated parallel, ignore memory dependency "
1695  << "checks.\n");
1696  CanVecMem = true;
1697  return;
1698  }
1699 
1700  for (LoadInst *LD : Loads) {
1701  Value *Ptr = LD->getPointerOperand();
1702  // If we did *not* see this pointer before, insert it to the
1703  // read list. If we *did* see it before, then it is already in
1704  // the read-write list. This allows us to vectorize expressions
1705  // such as A[i] += x; Because the address of A[i] is a read-write
1706  // pointer. This only works if the index of A[i] is consecutive.
1707  // If the address of i is unknown (for example A[B[i]]) then we may
1708  // read a few words, modify, and write a few words, and some of the
1709  // words may be written to the same address.
1710  bool IsReadOnlyPtr = false;
1711  if (Seen.insert(Ptr).second ||
1712  !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
1713  ++NumReads;
1714  IsReadOnlyPtr = true;
1715  }
1716 
1718  // The TBAA metadata could have a control dependency on the predication
1719  // condition, so we cannot rely on it when determining whether or not we
1720  // need runtime pointer checks.
1721  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1722  Loc.AATags.TBAA = nullptr;
1723 
1724  Accesses.addLoad(Loc, IsReadOnlyPtr);
1725  }
1726 
1727  // If we write (or read-write) to a single destination and there are no
1728  // other reads in this loop then is it safe to vectorize.
1729  if (NumReadWrites == 1 && NumReads == 0) {
1730  DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1731  CanVecMem = true;
1732  return;
1733  }
1734 
1735  // Build dependence sets and check whether we need a runtime pointer bounds
1736  // check.
1737  Accesses.buildDependenceSets();
1738 
1739  // Find pointers with computable bounds. We are going to use this information
1740  // to place a runtime bound check.
1741  bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
1742  TheLoop, SymbolicStrides);
1743  if (!CanDoRTIfNeeded) {
1744  recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
1745  DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1746  << "the array bounds.\n");
1747  CanVecMem = false;
1748  return;
1749  }
1750 
1751  DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1752 
1753  CanVecMem = true;
1754  if (Accesses.isDependencyCheckNeeded()) {
1755  DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1756  CanVecMem = DepChecker->areDepsSafe(
1757  DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
1758  MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
1759 
1760  if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
1761  DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1762 
1763  // Clear the dependency checks. We assume they are not needed.
1764  Accesses.resetDepChecks(*DepChecker);
1765 
1766  PtrRtChecking->reset();
1767  PtrRtChecking->Need = true;
1768 
1769  auto *SE = PSE->getSE();
1770  CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
1771  SymbolicStrides, true);
1772 
1773  // Check that we found the bounds for the pointer.
1774  if (!CanDoRTIfNeeded) {
1775  recordAnalysis("CantCheckMemDepsAtRunTime")
1776  << "cannot check memory dependencies at runtime";
1777  DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1778  CanVecMem = false;
1779  return;
1780  }
1781 
1782  CanVecMem = true;
1783  }
1784  }
1785 
1786  if (CanVecMem)
1787  DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1788  << (PtrRtChecking->Need ? "" : " don't")
1789  << " need runtime memory checks.\n");
1790  else {
1791  recordAnalysis("UnsafeMemDep")
1792  << "unsafe dependent memory operations in loop. Use "
1793  "#pragma loop distribute(enable) to allow loop distribution "
1794  "to attempt to isolate the offending operations into a separate "
1795  "loop";
1796  DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1797  }
1798 }
1799 
1801  DominatorTree *DT) {
1802  assert(TheLoop->contains(BB) && "Unknown block used");
1803 
1804  // Blocks that do not dominate the latch need predication.
1805  BasicBlock* Latch = TheLoop->getLoopLatch();
1806  return !DT->dominates(BB, Latch);
1807 }
1808 
1809 OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
1810  Instruction *I) {
1811  assert(!Report && "Multiple reports generated");
1812 
1813  Value *CodeRegion = TheLoop->getHeader();
1814  DebugLoc DL = TheLoop->getStartLoc();
1815 
1816  if (I) {
1817  CodeRegion = I->getParent();
1818  // If there is no debug location attached to the instruction, revert back to
1819  // using the loop's.
1820  if (I->getDebugLoc())
1821  DL = I->getDebugLoc();
1822  }
1823 
1824  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
1825  CodeRegion);
1826  return *Report;
1827 }
1828 
1830  auto *SE = PSE->getSE();
1831  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
1832  // never considered uniform.
1833  // TODO: Is this really what we want? Even without FP SCEV, we may want some
1834  // trivially loop-invariant FP values to be considered uniform.
1835  if (!SE->isSCEVable(V->getType()))
1836  return false;
1837  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
1838 }
1839 
1840 // FIXME: this function is currently a duplicate of the one in
1841 // LoopVectorize.cpp.
1843  Instruction *Loc) {
1844  if (FirstInst)
1845  return FirstInst;
1846  if (Instruction *I = dyn_cast<Instruction>(V))
1847  return I->getParent() == Loc->getParent() ? I : nullptr;
1848  return nullptr;
1849 }
1850 
1851 namespace {
1852 
1853 /// \brief IR Values for the lower and upper bounds of a pointer evolution. We
1854 /// need to use value-handles because SCEV expansion can invalidate previously
1855 /// expanded values. Thus expansion of a pointer can invalidate the bounds for
1856 /// a previous one.
1857 struct PointerBounds {
1858  TrackingVH<Value> Start;
1860 };
1861 
1862 } // end anonymous namespace
1863 
1864 /// \brief Expand code for the lower and upper bound of the pointer group \p CG
1865 /// in \p TheLoop. \return the values for the bounds.
1866 static PointerBounds
1868  Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE,
1869  const RuntimePointerChecking &PtrRtChecking) {
1870  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
1871  const SCEV *Sc = SE->getSCEV(Ptr);
1872 
1873  unsigned AS = Ptr->getType()->getPointerAddressSpace();
1874  LLVMContext &Ctx = Loc->getContext();
1875 
1876  // Use this type for pointer arithmetic.
1877  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
1878 
1879  if (SE->isLoopInvariant(Sc, TheLoop)) {
1880  DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
1881  << "\n");
1882  // Ptr could be in the loop body. If so, expand a new one at the correct
1883  // location.
1884  Instruction *Inst = dyn_cast<Instruction>(Ptr);
1885  Value *NewPtr = (Inst && TheLoop->contains(Inst))
1886  ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
1887  : Ptr;
1888  return {NewPtr, NewPtr};
1889  } else {
1890  Value *Start = nullptr, *End = nullptr;
1891  DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1892  Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
1893  End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
1894  DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n");
1895  return {Start, End};
1896  }
1897 }
1898 
1899 /// \brief Turns a collection of checks into a collection of expanded upper and
1900 /// lower bounds for both pointers in the check.
1903  Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp,
1904  const RuntimePointerChecking &PtrRtChecking) {
1906 
1907  // Here we're relying on the SCEV Expander's cache to only emit code for the
1908  // same bounds once.
1909  transform(
1910  PointerChecks, std::back_inserter(ChecksWithBounds),
1912  PointerBounds
1913  First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
1914  Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking);
1915  return std::make_pair(First, Second);
1916  });
1917 
1918  return ChecksWithBounds;
1919 }
1920 
1921 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks(
1922  Instruction *Loc,
1924  const {
1925  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
1926  auto *SE = PSE->getSE();
1927  SCEVExpander Exp(*SE, DL, "induction");
1928  auto ExpandedChecks =
1929  expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, *PtrRtChecking);
1930 
1931  LLVMContext &Ctx = Loc->getContext();
1932  Instruction *FirstInst = nullptr;
1933  IRBuilder<> ChkBuilder(Loc);
1934  // Our instructions might fold to a constant.
1935  Value *MemoryRuntimeCheck = nullptr;
1936 
1937  for (const auto &Check : ExpandedChecks) {
1938  const PointerBounds &A = Check.first, &B = Check.second;
1939  // Check if two pointers (A and B) conflict where conflict is computed as:
1940  // start(A) <= end(B) && start(B) <= end(A)
1941  unsigned AS0 = A.Start->getType()->getPointerAddressSpace();
1942  unsigned AS1 = B.Start->getType()->getPointerAddressSpace();
1943 
1944  assert((AS0 == B.End->getType()->getPointerAddressSpace()) &&
1945  (AS1 == A.End->getType()->getPointerAddressSpace()) &&
1946  "Trying to bounds check pointers with different address spaces");
1947 
1948  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
1949  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
1950 
1951  Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc");
1952  Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc");
1953  Value *End0 = ChkBuilder.CreateBitCast(A.End, PtrArithTy1, "bc");
1954  Value *End1 = ChkBuilder.CreateBitCast(B.End, PtrArithTy0, "bc");
1955 
1956  // [A|B].Start points to the first accessed byte under base [A|B].
1957  // [A|B].End points to the last accessed byte, plus one.
1958  // There is no conflict when the intervals are disjoint:
1959  // NoConflict = (B.Start >= A.End) || (A.Start >= B.End)
1960  //
1961  // bound0 = (B.Start < A.End)
1962  // bound1 = (A.Start < B.End)
1963  // IsConflict = bound0 & bound1
1964  Value *Cmp0 = ChkBuilder.CreateICmpULT(Start0, End1, "bound0");
1965  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
1966  Value *Cmp1 = ChkBuilder.CreateICmpULT(Start1, End0, "bound1");
1967  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
1968  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1969  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1970  if (MemoryRuntimeCheck) {
1971  IsConflict =
1972  ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");
1973  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1974  }
1975  MemoryRuntimeCheck = IsConflict;
1976  }
1977 
1978  if (!MemoryRuntimeCheck)
1979  return std::make_pair(nullptr, nullptr);
1980 
1981  // We have to do this trickery because the IRBuilder might fold the check to a
1982  // constant expression in which case there is no Instruction anchored in a
1983  // the block.
1984  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
1985  ConstantInt::getTrue(Ctx));
1986  ChkBuilder.Insert(Check, "memcheck.conflict");
1987  FirstInst = getFirstInst(FirstInst, Check, Loc);
1988  return std::make_pair(FirstInst, Check);
1989 }
1990 
1991 std::pair<Instruction *, Instruction *>
1993  if (!PtrRtChecking->Need)
1994  return std::make_pair(nullptr, nullptr);
1995 
1996  return addRuntimeChecks(Loc, PtrRtChecking->getChecks());
1997 }
1998 
1999 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2000  Value *Ptr = nullptr;
2001  if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
2002  Ptr = LI->getPointerOperand();
2003  else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
2004  Ptr = SI->getPointerOperand();
2005  else
2006  return;
2007 
2008  Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2009  if (!Stride)
2010  return;
2011 
2012  DEBUG(dbgs() << "LAA: Found a strided access that we can version");
2013  DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
2014  SymbolicStrides[Ptr] = Stride;
2015  StrideSet.insert(Stride);
2016 }
2017 
2019  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
2020  DominatorTree *DT, LoopInfo *LI)
2021  : PSE(llvm::make_unique<PredicatedScalarEvolution>(*SE, *L)),
2022  PtrRtChecking(llvm::make_unique<RuntimePointerChecking>(SE)),
2023  DepChecker(llvm::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
2024  NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
2025  StoreToLoopInvariantAddress(false) {
2026  if (canAnalyzeLoop())
2027  analyzeLoop(AA, LI, TLI, DT);
2028 }
2029 
2030 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
2031  if (CanVecMem) {
2032  OS.indent(Depth) << "Memory dependences are safe";
2033  if (MaxSafeDepDistBytes != -1ULL)
2034  OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
2035  << " bytes";
2036  if (PtrRtChecking->Need)
2037  OS << " with run-time checks";
2038  OS << "\n";
2039  }
2040 
2041  if (Report)
2042  OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
2043 
2044  if (auto *Dependences = DepChecker->getDependences()) {
2045  OS.indent(Depth) << "Dependences:\n";
2046  for (auto &Dep : *Dependences) {
2047  Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
2048  OS << "\n";
2049  }
2050  } else
2051  OS.indent(Depth) << "Too many dependences, not recorded\n";
2052 
2053  // List the pair of accesses need run-time checks to prove independence.
2054  PtrRtChecking->print(OS, Depth);
2055  OS << "\n";
2056 
2057  OS.indent(Depth) << "Store to invariant address was "
2058  << (StoreToLoopInvariantAddress ? "" : "not ")
2059  << "found in loop.\n";
2060 
2061  OS.indent(Depth) << "SCEV assumptions:\n";
2062  PSE->getUnionPredicate().print(OS, Depth);
2063 
2064  OS << "\n";
2065 
2066  OS.indent(Depth) << "Expressions re-written:\n";
2067  PSE->print(OS, Depth);
2068 }
2069 
2071  auto &LAI = LoopAccessInfoMap[L];
2072 
2073  if (!LAI)
2074  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
2075 
2076  return *LAI.get();
2077 }
2078 
2080  LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
2081 
2082  for (Loop *TopLevelLoop : *LI)
2083  for (Loop *L : depth_first(TopLevelLoop)) {
2084  OS.indent(2) << L->getHeader()->getName() << ":\n";
2085  auto &LAI = LAA.getInfo(L);
2086  LAI.print(OS, 4);
2087  }
2088 }
2089 
2091  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2092  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2093  TLI = TLIP ? &TLIP->getTLI() : nullptr;
2094  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2095  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2096  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2097 
2098  return false;
2099 }
2100 
2106 
2107  AU.setPreservesAll();
2108 }
2109 
2111 static const char laa_name[] = "Loop Access Analysis";
2112 #define LAA_NAME "loop-accesses"
2113 
2120 
2122 
2125  return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
2126 }
2127 
2128 namespace llvm {
2129 
2131  return new LoopAccessLegacyAnalysis();
2132  }
2133 
2134 } // end namespace llvm
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
MachineLoop * L
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
#define LAA_NAME
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
Definition: LoopInfo.h:185
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static unsigned RuntimeMemoryCheckThreshold
\brief When performing memory disambiguation checks at runtime do not make more than this number of c...
static bool Check(DecodeStatus &Out, DecodeStatus In)
static const char laa_name[]
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
#define DEBUG_TYPE
void push_back(const T &Elt)
Definition: SmallVector.h:211
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
SmallVector< CheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
const SCEV * getConstant(ConstantInt *V)
static bool isInBoundsGep(Value *Ptr)
size_t i
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:639
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1478
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:52
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:266
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:307
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
The main scalar evolution driver.
This class represents a function call, abstracting a target machine's calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
std::pair< Instruction *, Instruction * > addRuntimeChecks(Instruction *Loc) const
Add code that checks at runtime if the accessed arrays overlap.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void reset()
Reset the state of the pointer runtime information.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Pass * createLAAPass()
A debug info location.
Definition: DebugLoc.h:34
The adaptor from a function pass to a loop pass computes these analyses and makes them available to t...
An instruction for reading from memory.
Definition: Instructions.h:164
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
Hexagon Common GEP
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
member_iterator member_begin(iterator I) const
void emitOptimizationRemarkAnalysis(const char *PassName, const DebugLoc &DLoc, const Value *V, const Twine &Msg, bool IsVerbose=false)
Emit an optimization analysis remark message.
BlockT * getHeader() const
Definition: LoopInfo.h:102
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by ""loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
Type * getPointerElementType() const
Definition: Type.h:358
const SCEV * getStart() const
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:86
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:157
void printChecks(raw_ostream &OS, const SmallVectorImpl< PointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:324
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE)
Insert a pointer and calculate the start and end SCEVs.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
static const unsigned MaxVectorWidth
Maximum SIMD width.
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
member_iterator member_end() const
Diagnostic information for optimization analysis remarks.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset)
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:502
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
const Instruction * getInstr() const
This file implements a class to represent arbitrary precision integral constant values and operations...
bool isNegative() const
Definition: Constants.h:193
static const char * DepName[]
String version of the types.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1362
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
#define F(x, y, z)
Definition: MD5.cpp:51
This node represents a polynomial recurrence on the trip count of the specified loop.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
Function Alias Analysis false
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
This header provides classes for managing per-loop analyses.
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:639
std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:143
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1003
Optimization analysis message produced during vectorization.
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values...
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
std::enable_if<!std::is_array< T >::value, std::unique_ptr< T > >::type make_unique(Args &&...args)
Constructs a new T() with the given args and returns a unique_ptr<T> which owns the object...
Definition: STLExtras.h:845
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
PointerIntPair - This class implements a pair of a pointer and small integer.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Type * getType() const
Return the LLVM type of this SCEV expression.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:275
This analysis provides dependence information for the memory accesses of a loop.
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:259
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:109
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
static const unsigned End
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:52
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
iterator begin() const
Definition: SmallPtrSet.h:398
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:98
const APInt & getAPInt() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:213
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer can participate in a runtime bounds check.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:654
std::pair< const CheckingPtrGroup *, const CheckingPtrGroup * > PointerCheck
A memcheck which made up of a pair of grouped pointers.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:213
Value * stripIntegerCast(Value *V)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:218
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:64
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
bool isBackward() const
Lexically backward dependence.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:375
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:123
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
bool isForward() const
Lexically forward dependence.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:50
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:337
Drive the analysis of memory accesses in the loop.
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:558
ConstantInt * getValue() const
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:506
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
const LoopAccessInfo & getInfo(Loop *L)
Query the result of the loop access information for the loop L.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
Class for arbitrary precision integers.
Definition: APInt.h:77
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
Basic Alias true
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
This analysis provides dependence information for the memory accesses of a loop.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
static PointerBounds expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, const RuntimePointerChecking &PtrRtChecking)
Expand code for the lower and upper bound of the pointer group CG in TheLoop.
Dependece between memory access instructions.
This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
bool isFunctionVectorizable(StringRef F, unsigned VF) const
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
static bool isDependent(const MachineInstr &ProdMI, const MachineInstr &ConsMI)
This file provides utility analysis objects describing memory locations.
static void emitAnalysis(const LoopAccessReport &Message, const Loop *TheLoop, const char *PassName, OptimizationRemarkEmitter &ORE)
Emit an analysis note for PassName with the debug location from the instruction in Message if availab...
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:349
iterator end()
Definition: DenseMap.h:69
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
iterator find(const KeyT &Val)
Definition: DenseMap.h:127
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static Value * getPointerOperand(Value *I)
Take the pointer operand from the Load/Store instruction.
const Loop * getLoop() const
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:391
static unsigned getAddressSpaceOperand(Value *I)
Take the address space operand from the Load/Store instruction.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:987
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:807
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
iterator_range< df_iterator< T > > depth_first(const T &G)
const SCEVPredicate * getEqualPredicate(const SCEVUnknown *LHS, const SCEVConstant *RHS)
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. ""Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS)
RuntimePointerChecking & RtCheck
Constitutes the context of this pointer checking group.
LLVM Value Representation.
Definition: Value.h:71
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:41
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:44
bool empty() const
Definition: LoopInfo.h:136
#define DEBUG(X)
Definition: Debug.h:100
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:831
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
int * Ptr
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
APInt abs() const
Get the absolute value;.
Definition: APInt.h:1559
This header defines various interfaces for pass management in LLVM.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
The optimization diagnostic interface.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:64
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:411
const BasicBlock * getParent() const
Definition: Instruction.h:62
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge ""runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object...
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not ""generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Loop *L)
Check whether a pointer address cannot wrap.
This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI)