LLVM  3.7.0
LoopAccessAnalysis.cpp
Go to the documentation of this file.
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The implementation for the loop memory dependence that was originally
11 // developed for the loop vectorizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/IR/DiagnosticInfo.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/Support/Debug.h"
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "loop-accesses"
29 
31 VectorizationFactor("force-vector-width", cl::Hidden,
32  cl::desc("Sets the SIMD width. Zero is autoselect."),
35 
37 VectorizationInterleave("force-vector-interleave", cl::Hidden,
38  cl::desc("Sets the vectorization interleave count. "
39  "Zero is autoselect."),
43 
45  "runtime-memory-check-threshold", cl::Hidden,
46  cl::desc("When performing memory disambiguation checks at runtime do not "
47  "generate more than this number of comparisons (default = 8)."),
50 
51 /// \brief The maximum iterations used to merge memory checks
53  "memory-check-merge-threshold", cl::Hidden,
54  cl::desc("Maximum number of comparisons done when trying to merge "
55  "runtime memory checks. (default = 100)"),
56  cl::init(100));
57 
58 /// Maximum SIMD width.
59 const unsigned VectorizerParams::MaxVectorWidth = 64;
60 
61 /// \brief We collect interesting dependences up to this threshold.
63  "max-interesting-dependences", cl::Hidden,
64  cl::desc("Maximum number of interesting dependences collected by "
65  "loop-access analysis (default = 100)"),
66  cl::init(100));
67 
69  return ::VectorizationInterleave.getNumOccurrences() > 0;
70 }
71 
73  const Function *TheFunction,
74  const Loop *TheLoop,
75  const char *PassName) {
76  DebugLoc DL = TheLoop->getStartLoc();
77  if (const Instruction *I = Message.getInstr())
78  DL = I->getDebugLoc();
79  emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
80  *TheFunction, DL, Message.str());
81 }
82 
84  if (CastInst *CI = dyn_cast<CastInst>(V))
85  if (CI->getOperand(0)->getType()->isIntegerTy())
86  return CI->getOperand(0);
87  return V;
88 }
89 
91  const ValueToValueMap &PtrToStride,
92  Value *Ptr, Value *OrigPtr) {
93 
94  const SCEV *OrigSCEV = SE->getSCEV(Ptr);
95 
96  // If there is an entry in the map return the SCEV of the pointer with the
97  // symbolic stride replaced by one.
99  PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
100  if (SI != PtrToStride.end()) {
101  Value *StrideVal = SI->second;
102 
103  // Strip casts.
104  StrideVal = stripIntegerCast(StrideVal);
105 
106  // Replace symbolic stride by one.
107  Value *One = ConstantInt::get(StrideVal->getType(), 1);
108  ValueToValueMap RewriteMap;
109  RewriteMap[StrideVal] = One;
110 
111  const SCEV *ByOne =
112  SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true);
113  DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne
114  << "\n");
115  return ByOne;
116  }
117 
118  // Otherwise, just return the SCEV of the original pointer.
119  return SE->getSCEV(Ptr);
120 }
121 
122 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
123  unsigned DepSetId, unsigned ASId,
124  const ValueToValueMap &Strides) {
125  // Get the stride replaced scev.
126  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
127  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
128  assert(AR && "Invalid addrec expression");
129  const SCEV *Ex = SE->getBackedgeTakenCount(Lp);
130  const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE);
131  Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId,
132  Sc);
133 }
134 
136  const CheckingPtrGroup &M, const CheckingPtrGroup &N,
137  const SmallVectorImpl<int> *PtrPartition) const {
138  for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
139  for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
140  if (needsChecking(M.Members[I], N.Members[J], PtrPartition))
141  return true;
142  return false;
143 }
144 
145 /// Compare \p I and \p J and return the minimum.
146 /// Return nullptr in case we couldn't find an answer.
147 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
148  ScalarEvolution *SE) {
149  const SCEV *Diff = SE->getMinusSCEV(J, I);
150  const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
151 
152  if (!C)
153  return nullptr;
154  if (C->getValue()->isNegative())
155  return J;
156  return I;
157 }
158 
160  const SCEV *Start = RtCheck.Pointers[Index].Start;
161  const SCEV *End = RtCheck.Pointers[Index].End;
162 
163  // Compare the starts and ends with the known minimum and maximum
164  // of this set. We need to know how we compare against the min/max
165  // of the set in order to be able to emit memchecks.
166  const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
167  if (!Min0)
168  return false;
169 
170  const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
171  if (!Min1)
172  return false;
173 
174  // Update the low bound expression if we've found a new min value.
175  if (Min0 == Start)
176  Low = Start;
177 
178  // Update the high bound expression if we've found a new max value.
179  if (Min1 != End)
180  High = End;
181 
182  Members.push_back(Index);
183  return true;
184 }
185 
187  MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
188  // We build the groups from dependency candidates equivalence classes
189  // because:
190  // - We know that pointers in the same equivalence class share
191  // the same underlying object and therefore there is a chance
192  // that we can compare pointers
193  // - We wouldn't be able to merge two pointers for which we need
194  // to emit a memcheck. The classes in DepCands are already
195  // conveniently built such that no two pointers in the same
196  // class need checking against each other.
197 
198  // We use the following (greedy) algorithm to construct the groups
199  // For every pointer in the equivalence class:
200  // For each existing group:
201  // - if the difference between this pointer and the min/max bounds
202  // of the group is a constant, then make the pointer part of the
203  // group and update the min/max bounds of that group as required.
204 
205  CheckingGroups.clear();
206 
207  // If we don't have the dependency partitions, construct a new
208  // checking pointer group for each pointer.
209  if (!UseDependencies) {
210  for (unsigned I = 0; I < Pointers.size(); ++I)
211  CheckingGroups.push_back(CheckingPtrGroup(I, *this));
212  return;
213  }
214 
215  unsigned TotalComparisons = 0;
216 
217  DenseMap<Value *, unsigned> PositionMap;
218  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
219  PositionMap[Pointers[Index].PointerValue] = Index;
220 
221  // We need to keep track of what pointers we've already seen so we
222  // don't process them twice.
224 
225  // Go through all equivalence classes, get the the "pointer check groups"
226  // and add them to the overall solution. We use the order in which accesses
227  // appear in 'Pointers' to enforce determinism.
228  for (unsigned I = 0; I < Pointers.size(); ++I) {
229  // We've seen this pointer before, and therefore already processed
230  // its equivalence class.
231  if (Seen.count(I))
232  continue;
233 
234  MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
235  Pointers[I].IsWritePtr);
236 
238  auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
239 
240  // Because DepCands is constructed by visiting accesses in the order in
241  // which they appear in alias sets (which is deterministic) and the
242  // iteration order within an equivalence class member is only dependent on
243  // the order in which unions and insertions are performed on the
244  // equivalence class, the iteration order is deterministic.
245  for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
246  MI != ME; ++MI) {
247  unsigned Pointer = PositionMap[MI->getPointer()];
248  bool Merged = false;
249  // Mark this pointer as seen.
250  Seen.insert(Pointer);
251 
252  // Go through all the existing sets and see if we can find one
253  // which can include this pointer.
254  for (CheckingPtrGroup &Group : Groups) {
255  // Don't perform more than a certain amount of comparisons.
256  // This should limit the cost of grouping the pointers to something
257  // reasonable. If we do end up hitting this threshold, the algorithm
258  // will create separate groups for all remaining pointers.
259  if (TotalComparisons > MemoryCheckMergeThreshold)
260  break;
261 
262  TotalComparisons++;
263 
264  if (Group.addPointer(Pointer)) {
265  Merged = true;
266  break;
267  }
268  }
269 
270  if (!Merged)
271  // We couldn't add this pointer to any existing set or the threshold
272  // for the number of comparisons has been reached. Create a new group
273  // to hold the current pointer.
274  Groups.push_back(CheckingPtrGroup(Pointer, *this));
275  }
276 
277  // We've computed the grouped checks for this partition.
278  // Save the results and continue with the next one.
279  std::copy(Groups.begin(), Groups.end(), std::back_inserter(CheckingGroups));
280  }
281 }
282 
284  unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const {
285  const PointerInfo &PointerI = Pointers[I];
286  const PointerInfo &PointerJ = Pointers[J];
287 
288  // No need to check if two readonly pointers intersect.
289  if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
290  return false;
291 
292  // Only need to check pointers between two different dependency sets.
293  if (PointerI.DependencySetId == PointerJ.DependencySetId)
294  return false;
295 
296  // Only need to check pointers in the same alias set.
297  if (PointerI.AliasSetId != PointerJ.AliasSetId)
298  return false;
299 
300  // If PtrPartition is set omit checks between pointers of the same partition.
301  // Partition number -1 means that the pointer is used in multiple partitions.
302  // In this case we can't omit the check.
303  if (PtrPartition && (*PtrPartition)[I] != -1 &&
304  (*PtrPartition)[I] == (*PtrPartition)[J])
305  return false;
306 
307  return true;
308 }
309 
311  raw_ostream &OS, unsigned Depth,
312  const SmallVectorImpl<int> *PtrPartition) const {
313 
314  OS.indent(Depth) << "Run-time memory checks:\n";
315 
316  unsigned N = 0;
317  for (unsigned I = 0; I < CheckingGroups.size(); ++I)
318  for (unsigned J = I + 1; J < CheckingGroups.size(); ++J)
319  if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) {
320  OS.indent(Depth) << "Check " << N++ << ":\n";
321  OS.indent(Depth + 2) << "Comparing group " << I << ":\n";
322 
323  for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) {
324  OS.indent(Depth + 2)
325  << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n";
326  if (PtrPartition)
327  OS << " (Partition: "
328  << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")"
329  << "\n";
330  }
331 
332  OS.indent(Depth + 2) << "Against group " << J << ":\n";
333 
334  for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) {
335  OS.indent(Depth + 2)
336  << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n";
337  if (PtrPartition)
338  OS << " (Partition: "
339  << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")"
340  << "\n";
341  }
342  }
343 
344  OS.indent(Depth) << "Grouped accesses:\n";
345  for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
346  OS.indent(Depth + 2) << "Group " << I << ":\n";
347  OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low
348  << " High: " << *CheckingGroups[I].High << ")\n";
349  for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) {
350  OS.indent(Depth + 6) << "Member: "
351  << *Pointers[CheckingGroups[I].Members[J]].Expr
352  << "\n";
353  }
354  }
355 }
356 
358  const SmallVectorImpl<int> *PtrPartition) const {
359 
360  unsigned NumPartitions = CheckingGroups.size();
361  unsigned CheckCount = 0;
362 
363  for (unsigned I = 0; I < NumPartitions; ++I)
364  for (unsigned J = I + 1; J < NumPartitions; ++J)
365  if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition))
366  CheckCount++;
367  return CheckCount;
368 }
369 
371  const SmallVectorImpl<int> *PtrPartition) const {
372  unsigned NumPointers = Pointers.size();
373 
374  for (unsigned I = 0; I < NumPointers; ++I)
375  for (unsigned J = I + 1; J < NumPointers; ++J)
376  if (needsChecking(I, J, PtrPartition))
377  return true;
378  return false;
379 }
380 
381 namespace {
382 /// \brief Analyses memory accesses in a loop.
383 ///
384 /// Checks whether run time pointer checks are needed and builds sets for data
385 /// dependence checking.
386 class AccessAnalysis {
387 public:
388  /// \brief Read or write access location.
389  typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
390  typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;
391 
392  AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI,
394  : DL(Dl), AST(*AA), LI(LI), DepCands(DA),
395  IsRTCheckAnalysisNeeded(false) {}
396 
397  /// \brief Register a load and whether it is only read from.
398  void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
399  Value *Ptr = const_cast<Value*>(Loc.Ptr);
400  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
401  Accesses.insert(MemAccessInfo(Ptr, false));
402  if (IsReadOnly)
403  ReadOnlyPtr.insert(Ptr);
404  }
405 
406  /// \brief Register a store.
407  void addStore(MemoryLocation &Loc) {
408  Value *Ptr = const_cast<Value*>(Loc.Ptr);
409  AST.add(Ptr, MemoryLocation::UnknownSize, Loc.AATags);
410  Accesses.insert(MemAccessInfo(Ptr, true));
411  }
412 
413  /// \brief Check whether we can check the pointers at runtime for
414  /// non-intersection.
415  ///
416  /// Returns true if we need no check or if we do and we can generate them
417  /// (i.e. the pointers have computable bounds).
418  bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
419  Loop *TheLoop, const ValueToValueMap &Strides,
420  bool ShouldCheckStride = false);
421 
422  /// \brief Goes over all memory accesses, checks whether a RT check is needed
423  /// and builds sets of dependent accesses.
424  void buildDependenceSets() {
425  processMemAccesses();
426  }
427 
428  /// \brief Initial processing of memory accesses determined that we need to
429  /// perform dependency checking.
430  ///
431  /// Note that this can later be cleared if we retry memcheck analysis without
432  /// dependency checking (i.e. ShouldRetryWithRuntimeCheck).
433  bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
434 
435  /// We decided that no dependence analysis would be used. Reset the state.
436  void resetDepChecks(MemoryDepChecker &DepChecker) {
437  CheckDeps.clear();
438  DepChecker.clearInterestingDependences();
439  }
440 
441  MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; }
442 
443 private:
444  typedef SetVector<MemAccessInfo> PtrAccessSet;
445 
446  /// \brief Go over all memory access and check whether runtime pointer checks
447  /// are needed and build sets of dependency check candidates.
448  void processMemAccesses();
449 
450  /// Set of all accesses.
451  PtrAccessSet Accesses;
452 
453  const DataLayout &DL;
454 
455  /// Set of accesses that need a further dependence check.
456  MemAccessInfoSet CheckDeps;
457 
458  /// Set of pointers that are read only.
459  SmallPtrSet<Value*, 16> ReadOnlyPtr;
460 
461  /// An alias set tracker to partition the access set by underlying object and
462  //intrinsic property (such as TBAA metadata).
463  AliasSetTracker AST;
464 
465  LoopInfo *LI;
466 
467  /// Sets of potentially dependent accesses - members of one set share an
468  /// underlying pointer. The set "CheckDeps" identfies which sets really need a
469  /// dependence check.
471 
472  /// \brief Initial processing of memory accesses determined that we may need
473  /// to add memchecks. Perform the analysis to determine the necessary checks.
474  ///
475  /// Note that, this is different from isDependencyCheckNeeded. When we retry
476  /// memcheck analysis without dependency checking
477  /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared
478  /// while this remains set if we have potentially dependent accesses.
479  bool IsRTCheckAnalysisNeeded;
480 };
481 
482 } // end anonymous namespace
483 
484 /// \brief Check whether a pointer can participate in a runtime bounds check.
486  const ValueToValueMap &Strides, Value *Ptr) {
487  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr);
488  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
489  if (!AR)
490  return false;
491 
492  return AR->isAffine();
493 }
494 
495 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
496  ScalarEvolution *SE, Loop *TheLoop,
497  const ValueToValueMap &StridesMap,
498  bool ShouldCheckStride) {
499  // Find pointers with computable bounds. We are going to use this information
500  // to place a runtime bound check.
501  bool CanDoRT = true;
502 
503  bool NeedRTCheck = false;
504  if (!IsRTCheckAnalysisNeeded) return true;
505 
506  bool IsDepCheckNeeded = isDependencyCheckNeeded();
507 
508  // We assign a consecutive id to access from different alias sets.
509  // Accesses between different groups doesn't need to be checked.
510  unsigned ASId = 1;
511  for (auto &AS : AST) {
512  int NumReadPtrChecks = 0;
513  int NumWritePtrChecks = 0;
514 
515  // We assign consecutive id to access from different dependence sets.
516  // Accesses within the same set don't need a runtime check.
517  unsigned RunningDepId = 1;
519 
520  for (auto A : AS) {
521  Value *Ptr = A.getValue();
522  bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
523  MemAccessInfo Access(Ptr, IsWrite);
524 
525  if (IsWrite)
526  ++NumWritePtrChecks;
527  else
528  ++NumReadPtrChecks;
529 
530  if (hasComputableBounds(SE, StridesMap, Ptr) &&
531  // When we run after a failing dependency check we have to make sure
532  // we don't have wrapping pointers.
533  (!ShouldCheckStride ||
534  isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) {
535  // The id of the dependence set.
536  unsigned DepId;
537 
538  if (IsDepCheckNeeded) {
539  Value *Leader = DepCands.getLeaderValue(Access).getPointer();
540  unsigned &LeaderId = DepSetId[Leader];
541  if (!LeaderId)
542  LeaderId = RunningDepId++;
543  DepId = LeaderId;
544  } else
545  // Each access has its own dependence set.
546  DepId = RunningDepId++;
547 
548  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap);
549 
550  DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
551  } else {
552  DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
553  CanDoRT = false;
554  }
555  }
556 
557  // If we have at least two writes or one write and a read then we need to
558  // check them. But there is no need to checks if there is only one
559  // dependence set for this alias set.
560  //
561  // Note that this function computes CanDoRT and NeedRTCheck independently.
562  // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer
563  // for which we couldn't find the bounds but we don't actually need to emit
564  // any checks so it does not matter.
565  if (!(IsDepCheckNeeded && CanDoRT && RunningDepId == 2))
566  NeedRTCheck |= (NumWritePtrChecks >= 2 || (NumReadPtrChecks >= 1 &&
567  NumWritePtrChecks >= 1));
568 
569  ++ASId;
570  }
571 
572  // If the pointers that we would use for the bounds comparison have different
573  // address spaces, assume the values aren't directly comparable, so we can't
574  // use them for the runtime check. We also have to assume they could
575  // overlap. In the future there should be metadata for whether address spaces
576  // are disjoint.
577  unsigned NumPointers = RtCheck.Pointers.size();
578  for (unsigned i = 0; i < NumPointers; ++i) {
579  for (unsigned j = i + 1; j < NumPointers; ++j) {
580  // Only need to check pointers between two different dependency sets.
581  if (RtCheck.Pointers[i].DependencySetId ==
582  RtCheck.Pointers[j].DependencySetId)
583  continue;
584  // Only need to check pointers in the same alias set.
585  if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
586  continue;
587 
588  Value *PtrI = RtCheck.Pointers[i].PointerValue;
589  Value *PtrJ = RtCheck.Pointers[j].PointerValue;
590 
591  unsigned ASi = PtrI->getType()->getPointerAddressSpace();
592  unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
593  if (ASi != ASj) {
594  DEBUG(dbgs() << "LAA: Runtime check would require comparison between"
595  " different address spaces\n");
596  return false;
597  }
598  }
599  }
600 
601  if (NeedRTCheck && CanDoRT)
602  RtCheck.groupChecks(DepCands, IsDepCheckNeeded);
603 
604  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr)
605  << " pointer comparisons.\n");
606 
607  RtCheck.Need = NeedRTCheck;
608 
609  bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT;
610  if (!CanDoRTIfNeeded)
611  RtCheck.reset();
612  return CanDoRTIfNeeded;
613 }
614 
615 void AccessAnalysis::processMemAccesses() {
616  // We process the set twice: first we process read-write pointers, last we
617  // process read-only pointers. This allows us to skip dependence tests for
618  // read-only pointers.
619 
620  DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
621  DEBUG(dbgs() << " AST: "; AST.dump());
622  DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n");
623  DEBUG({
624  for (auto A : Accesses)
625  dbgs() << "\t" << *A.getPointer() << " (" <<
626  (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
627  "read-only" : "read")) << ")\n";
628  });
629 
630  // The AliasSetTracker has nicely partitioned our pointers by metadata
631  // compatibility and potential for underlying-object overlap. As a result, we
632  // only need to check for potential pointer dependencies within each alias
633  // set.
634  for (auto &AS : AST) {
635  // Note that both the alias-set tracker and the alias sets themselves used
636  // linked lists internally and so the iteration order here is deterministic
637  // (matching the original instruction order within each set).
638 
639  bool SetHasWrite = false;
640 
641  // Map of pointers to last access encountered.
642  typedef DenseMap<Value*, MemAccessInfo> UnderlyingObjToAccessMap;
643  UnderlyingObjToAccessMap ObjToLastAccess;
644 
645  // Set of access to check after all writes have been processed.
646  PtrAccessSet DeferredAccesses;
647 
648  // Iterate over each alias set twice, once to process read/write pointers,
649  // and then to process read-only pointers.
650  for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
651  bool UseDeferred = SetIteration > 0;
652  PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
653 
654  for (auto AV : AS) {
655  Value *Ptr = AV.getValue();
656 
657  // For a single memory access in AliasSetTracker, Accesses may contain
658  // both read and write, and they both need to be handled for CheckDeps.
659  for (auto AC : S) {
660  if (AC.getPointer() != Ptr)
661  continue;
662 
663  bool IsWrite = AC.getInt();
664 
665  // If we're using the deferred access set, then it contains only
666  // reads.
667  bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
668  if (UseDeferred && !IsReadOnlyPtr)
669  continue;
670  // Otherwise, the pointer must be in the PtrAccessSet, either as a
671  // read or a write.
672  assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
673  S.count(MemAccessInfo(Ptr, false))) &&
674  "Alias-set pointer not in the access set?");
675 
676  MemAccessInfo Access(Ptr, IsWrite);
677  DepCands.insert(Access);
678 
679  // Memorize read-only pointers for later processing and skip them in
680  // the first round (they need to be checked after we have seen all
681  // write pointers). Note: we also mark pointer that are not
682  // consecutive as "read-only" pointers (so that we check
683  // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
684  if (!UseDeferred && IsReadOnlyPtr) {
685  DeferredAccesses.insert(Access);
686  continue;
687  }
688 
689  // If this is a write - check other reads and writes for conflicts. If
690  // this is a read only check other writes for conflicts (but only if
691  // there is no other write to the ptr - this is an optimization to
692  // catch "a[i] = a[i] + " without having to do a dependence check).
693  if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
694  CheckDeps.insert(Access);
695  IsRTCheckAnalysisNeeded = true;
696  }
697 
698  if (IsWrite)
699  SetHasWrite = true;
700 
701  // Create sets of pointers connected by a shared alias set and
702  // underlying object.
703  typedef SmallVector<Value *, 16> ValueVector;
704  ValueVector TempObjects;
705 
706  GetUnderlyingObjects(Ptr, TempObjects, DL, LI);
707  DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");
708  for (Value *UnderlyingObj : TempObjects) {
709  UnderlyingObjToAccessMap::iterator Prev =
710  ObjToLastAccess.find(UnderlyingObj);
711  if (Prev != ObjToLastAccess.end())
712  DepCands.unionSets(Access, Prev->second);
713 
714  ObjToLastAccess[UnderlyingObj] = Access;
715  DEBUG(dbgs() << " " << *UnderlyingObj << "\n");
716  }
717  }
718  }
719  }
720  }
721 }
722 
723 static bool isInBoundsGep(Value *Ptr) {
724  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
725  return GEP->isInBounds();
726  return false;
727 }
728 
729 /// \brief Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
730 /// i.e. monotonically increasing/decreasing.
731 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
732  ScalarEvolution *SE, const Loop *L) {
733  // FIXME: This should probably only return true for NUW.
735  return true;
736 
737  // Scalar evolution does not propagate the non-wrapping flags to values that
738  // are derived from a non-wrapping induction variable because non-wrapping
739  // could be flow-sensitive.
740  //
741  // Look through the potentially overflowing instruction to try to prove
742  // non-wrapping for the *specific* value of Ptr.
743 
744  // The arithmetic implied by an inbounds GEP can't overflow.
745  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
746  if (!GEP || !GEP->isInBounds())
747  return false;
748 
749  // Make sure there is only one non-const index and analyze that.
750  Value *NonConstIndex = nullptr;
751  for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
752  if (!isa<ConstantInt>(*Index)) {
753  if (NonConstIndex)
754  return false;
755  NonConstIndex = *Index;
756  }
757  if (!NonConstIndex)
758  // The recurrence is on the pointer, ignore for now.
759  return false;
760 
761  // The index in GEP is signed. It is non-wrapping if it's derived from a NSW
762  // AddRec using a NSW operation.
763  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
764  if (OBO->hasNoSignedWrap() &&
765  // Assume constant for other the operand so that the AddRec can be
766  // easily found.
767  isa<ConstantInt>(OBO->getOperand(1))) {
768  auto *OpScev = SE->getSCEV(OBO->getOperand(0));
769 
770  if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
771  return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
772  }
773 
774  return false;
775 }
776 
777 /// \brief Check whether the access through \p Ptr has a constant stride.
778 int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,
779  const ValueToValueMap &StridesMap) {
780  const Type *Ty = Ptr->getType();
781  assert(Ty->isPointerTy() && "Unexpected non-ptr");
782 
783  // Make sure that the pointer does not point to aggregate types.
784  const PointerType *PtrTy = cast<PointerType>(Ty);
785  if (PtrTy->getElementType()->isAggregateType()) {
786  DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
787  << *Ptr << "\n");
788  return 0;
789  }
790 
791  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr);
792 
793  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
794  if (!AR) {
795  DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer "
796  << *Ptr << " SCEV: " << *PtrScev << "\n");
797  return 0;
798  }
799 
800  // The accesss function must stride over the innermost loop.
801  if (Lp != AR->getLoop()) {
802  DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
803  *Ptr << " SCEV: " << *PtrScev << "\n");
804  }
805 
806  // The address calculation must not wrap. Otherwise, a dependence could be
807  // inverted.
808  // An inbounds getelementptr that is a AddRec with a unit stride
809  // cannot wrap per definition. The unit stride requirement is checked later.
810  // An getelementptr without an inbounds attribute and unit stride would have
811  // to access the pointer value "0" which is undefined behavior in address
812  // space 0, therefore we can also vectorize this case.
813  bool IsInBoundsGEP = isInBoundsGep(Ptr);
814  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp);
815  bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
816  if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
817  DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
818  << *Ptr << " SCEV: " << *PtrScev << "\n");
819  return 0;
820  }
821 
822  // Check the step is constant.
823  const SCEV *Step = AR->getStepRecurrence(*SE);
824 
825  // Calculate the pointer stride and check if it is constant.
826  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
827  if (!C) {
828  DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr <<
829  " SCEV: " << *PtrScev << "\n");
830  return 0;
831  }
832 
833  auto &DL = Lp->getHeader()->getModule()->getDataLayout();
834  int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
835  const APInt &APStepVal = C->getValue()->getValue();
836 
837  // Huge step value - give up.
838  if (APStepVal.getBitWidth() > 64)
839  return 0;
840 
841  int64_t StepVal = APStepVal.getSExtValue();
842 
843  // Strided access.
844  int64_t Stride = StepVal / Size;
845  int64_t Rem = StepVal % Size;
846  if (Rem)
847  return 0;
848 
849  // If the SCEV could wrap but we have an inbounds gep with a unit stride we
850  // know we can't "wrap around the address space". In case of address space
851  // zero we know that this won't happen without triggering undefined behavior.
852  if (!IsNoWrapAddRec && (IsInBoundsGEP || IsInAddressSpaceZero) &&
853  Stride != 1 && Stride != -1)
854  return 0;
855 
856  return Stride;
857 }
858 
860  switch (Type) {
861  case NoDep:
862  case Forward:
863  case BackwardVectorizable:
864  return true;
865 
866  case Unknown:
867  case ForwardButPreventsForwarding:
868  case Backward:
869  case BackwardVectorizableButPreventsForwarding:
870  return false;
871  }
872  llvm_unreachable("unexpected DepType!");
873 }
874 
876  switch (Type) {
877  case NoDep:
878  case Forward:
879  return false;
880 
881  case BackwardVectorizable:
882  case Unknown:
883  case ForwardButPreventsForwarding:
884  case Backward:
885  case BackwardVectorizableButPreventsForwarding:
886  return true;
887  }
888  llvm_unreachable("unexpected DepType!");
889 }
890 
892  switch (Type) {
893  case NoDep:
894  case Forward:
895  case ForwardButPreventsForwarding:
896  return false;
897 
898  case Unknown:
899  case BackwardVectorizable:
900  case Backward:
901  case BackwardVectorizableButPreventsForwarding:
902  return true;
903  }
904  llvm_unreachable("unexpected DepType!");
905 }
906 
907 bool MemoryDepChecker::couldPreventStoreLoadForward(unsigned Distance,
908  unsigned TypeByteSize) {
909  // If loads occur at a distance that is not a multiple of a feasible vector
910  // factor store-load forwarding does not take place.
911  // Positive dependences might cause troubles because vectorizing them might
912  // prevent store-load forwarding making vectorized code run a lot slower.
913  // a[i] = a[i-3] ^ a[i-8];
914  // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
915  // hence on your typical architecture store-load forwarding does not take
916  // place. Vectorizing in such cases does not make sense.
917  // Store-load forwarding distance.
918  const unsigned NumCyclesForStoreLoadThroughMemory = 8*TypeByteSize;
919  // Maximum vector factor.
920  unsigned MaxVFWithoutSLForwardIssues =
921  VectorizerParams::MaxVectorWidth * TypeByteSize;
922  if(MaxSafeDepDistBytes < MaxVFWithoutSLForwardIssues)
923  MaxVFWithoutSLForwardIssues = MaxSafeDepDistBytes;
924 
925  for (unsigned vf = 2*TypeByteSize; vf <= MaxVFWithoutSLForwardIssues;
926  vf *= 2) {
927  if (Distance % vf && Distance / vf < NumCyclesForStoreLoadThroughMemory) {
928  MaxVFWithoutSLForwardIssues = (vf >>=1);
929  break;
930  }
931  }
932 
933  if (MaxVFWithoutSLForwardIssues< 2*TypeByteSize) {
934  DEBUG(dbgs() << "LAA: Distance " << Distance <<
935  " that could cause a store-load forwarding conflict\n");
936  return true;
937  }
938 
939  if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
940  MaxVFWithoutSLForwardIssues !=
941  VectorizerParams::MaxVectorWidth * TypeByteSize)
942  MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
943  return false;
944 }
945 
946 /// \brief Check the dependence for two accesses with the same stride \p Stride.
947 /// \p Distance is the positive distance and \p TypeByteSize is type size in
948 /// bytes.
949 ///
950 /// \returns true if they are independent.
951 static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride,
952  unsigned TypeByteSize) {
953  assert(Stride > 1 && "The stride must be greater than 1");
954  assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
955  assert(Distance > 0 && "The distance must be non-zero");
956 
957  // Skip if the distance is not multiple of type byte size.
958  if (Distance % TypeByteSize)
959  return false;
960 
961  unsigned ScaledDist = Distance / TypeByteSize;
962 
963  // No dependence if the scaled distance is not multiple of the stride.
964  // E.g.
965  // for (i = 0; i < 1024 ; i += 4)
966  // A[i+2] = A[i] + 1;
967  //
968  // Two accesses in memory (scaled distance is 2, stride is 4):
969  // | A[0] | | | | A[4] | | | |
970  // | | | A[2] | | | | A[6] | |
971  //
972  // E.g.
973  // for (i = 0; i < 1024 ; i += 3)
974  // A[i+4] = A[i] + 1;
975  //
976  // Two accesses in memory (scaled distance is 4, stride is 3):
977  // | A[0] | | | A[3] | | | A[6] | | |
978  // | | | | | A[4] | | | A[7] | |
979  return ScaledDist % Stride;
980 }
981 
983 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
984  const MemAccessInfo &B, unsigned BIdx,
985  const ValueToValueMap &Strides) {
986  assert (AIdx < BIdx && "Must pass arguments in program order");
987 
988  Value *APtr = A.getPointer();
989  Value *BPtr = B.getPointer();
990  bool AIsWrite = A.getInt();
991  bool BIsWrite = B.getInt();
992 
993  // Two reads are independent.
994  if (!AIsWrite && !BIsWrite)
995  return Dependence::NoDep;
996 
997  // We cannot check pointers in different address spaces.
998  if (APtr->getType()->getPointerAddressSpace() !=
999  BPtr->getType()->getPointerAddressSpace())
1000  return Dependence::Unknown;
1001 
1002  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr);
1003  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr);
1004 
1005  int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides);
1006  int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides);
1007 
1008  const SCEV *Src = AScev;
1009  const SCEV *Sink = BScev;
1010 
1011  // If the induction step is negative we have to invert source and sink of the
1012  // dependence.
1013  if (StrideAPtr < 0) {
1014  //Src = BScev;
1015  //Sink = AScev;
1016  std::swap(APtr, BPtr);
1017  std::swap(Src, Sink);
1018  std::swap(AIsWrite, BIsWrite);
1019  std::swap(AIdx, BIdx);
1020  std::swap(StrideAPtr, StrideBPtr);
1021  }
1022 
1023  const SCEV *Dist = SE->getMinusSCEV(Sink, Src);
1024 
1025  DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1026  << "(Induction step: " << StrideAPtr << ")\n");
1027  DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
1028  << *InstMap[BIdx] << ": " << *Dist << "\n");
1029 
1030  // Need accesses with constant stride. We don't want to vectorize
1031  // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
1032  // the address space.
1033  if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1034  DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1035  return Dependence::Unknown;
1036  }
1037 
1038  const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
1039  if (!C) {
1040  DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
1041  ShouldRetryWithRuntimeCheck = true;
1042  return Dependence::Unknown;
1043  }
1044 
1045  Type *ATy = APtr->getType()->getPointerElementType();
1046  Type *BTy = BPtr->getType()->getPointerElementType();
1047  auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
1048  unsigned TypeByteSize = DL.getTypeAllocSize(ATy);
1049 
1050  // Negative distances are not plausible dependencies.
1051  const APInt &Val = C->getValue()->getValue();
1052  if (Val.isNegative()) {
1053  bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1054  if (IsTrueDataDependence &&
1055  (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
1056  ATy != BTy))
1057  return Dependence::ForwardButPreventsForwarding;
1058 
1059  DEBUG(dbgs() << "LAA: Dependence is negative: NoDep\n");
1060  return Dependence::Forward;
1061  }
1062 
1063  // Write to the same location with the same size.
1064  // Could be improved to assert type sizes are the same (i32 == float, etc).
1065  if (Val == 0) {
1066  if (ATy == BTy)
1067  return Dependence::NoDep;
1068  DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");
1069  return Dependence::Unknown;
1070  }
1071 
1072  assert(Val.isStrictlyPositive() && "Expect a positive value");
1073 
1074  if (ATy != BTy) {
1075  DEBUG(dbgs() <<
1076  "LAA: ReadWrite-Write positive dependency with different types\n");
1077  return Dependence::Unknown;
1078  }
1079 
1080  unsigned Distance = (unsigned) Val.getZExtValue();
1081 
1082  unsigned Stride = std::abs(StrideAPtr);
1083  if (Stride > 1 &&
1084  areStridedAccessesIndependent(Distance, Stride, TypeByteSize)) {
1085  DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
1086  return Dependence::NoDep;
1087  }
1088 
1089  // Bail out early if passed-in parameters make vectorization not feasible.
1090  unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
1092  unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
1094  // The minimum number of iterations for a vectorized/unrolled version.
1095  unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1096 
1097  // It's not vectorizable if the distance is smaller than the minimum distance
1098  // needed for a vectroized/unrolled version. Vectorizing one iteration in
1099  // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
1100  // TypeByteSize (No need to plus the last gap distance).
1101  //
1102  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1103  // foo(int *A) {
1104  // int *B = (int *)((char *)A + 14);
1105  // for (i = 0 ; i < 1024 ; i += 2)
1106  // B[i] = A[i] + 1;
1107  // }
1108  //
1109  // Two accesses in memory (stride is 2):
1110  // | A[0] | | A[2] | | A[4] | | A[6] | |
1111  // | B[0] | | B[2] | | B[4] |
1112  //
1113  // Distance needs for vectorizing iterations except the last iteration:
1114  // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
1115  // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
1116  //
1117  // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
1118  // 12, which is less than distance.
1119  //
1120  // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
1121  // the minimum distance needed is 28, which is greater than distance. It is
1122  // not safe to do vectorization.
1123  unsigned MinDistanceNeeded =
1124  TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1125  if (MinDistanceNeeded > Distance) {
1126  DEBUG(dbgs() << "LAA: Failure because of positive distance " << Distance
1127  << '\n');
1128  return Dependence::Backward;
1129  }
1130 
1131  // Unsafe if the minimum distance needed is greater than max safe distance.
1132  if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1133  DEBUG(dbgs() << "LAA: Failure because it needs at least "
1134  << MinDistanceNeeded << " size in bytes");
1135  return Dependence::Backward;
1136  }
1137 
1138  // Positive distance bigger than max vectorization factor.
1139  // FIXME: Should use max factor instead of max distance in bytes, which could
1140  // not handle different types.
1141  // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
1142  // void foo (int *A, char *B) {
1143  // for (unsigned i = 0; i < 1024; i++) {
1144  // A[i+2] = A[i] + 1;
1145  // B[i+2] = B[i] + 1;
1146  // }
1147  // }
1148  //
1149  // This case is currently unsafe according to the max safe distance. If we
1150  // analyze the two accesses on array B, the max safe dependence distance
1151  // is 2. Then we analyze the accesses on array A, the minimum distance needed
1152  // is 8, which is less than 2 and forbidden vectorization, But actually
1153  // both A and B could be vectorized by 2 iterations.
1154  MaxSafeDepDistBytes =
1155  Distance < MaxSafeDepDistBytes ? Distance : MaxSafeDepDistBytes;
1156 
1157  bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
1158  if (IsTrueDataDependence &&
1159  couldPreventStoreLoadForward(Distance, TypeByteSize))
1160  return Dependence::BackwardVectorizableButPreventsForwarding;
1161 
1162  DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
1163  << " with max VF = "
1164  << MaxSafeDepDistBytes / (TypeByteSize * Stride) << '\n');
1165 
1166  return Dependence::BackwardVectorizable;
1167 }
1168 
1170  MemAccessInfoSet &CheckDeps,
1171  const ValueToValueMap &Strides) {
1172 
1173  MaxSafeDepDistBytes = -1U;
1174  while (!CheckDeps.empty()) {
1175  MemAccessInfo CurAccess = *CheckDeps.begin();
1176 
1177  // Get the relevant memory access set.
1179  AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
1180 
1181  // Check accesses within this set.
1183  AI = AccessSets.member_begin(I), AE = AccessSets.member_end();
1184 
1185  // Check every access pair.
1186  while (AI != AE) {
1187  CheckDeps.erase(*AI);
1189  while (OI != AE) {
1190  // Check every accessing instruction pair in program order.
1191  for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
1192  I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
1193  for (std::vector<unsigned>::iterator I2 = Accesses[*OI].begin(),
1194  I2E = Accesses[*OI].end(); I2 != I2E; ++I2) {
1195  auto A = std::make_pair(&*AI, *I1);
1196  auto B = std::make_pair(&*OI, *I2);
1197 
1198  assert(*I1 != *I2);
1199  if (*I1 > *I2)
1200  std::swap(A, B);
1201 
1203  isDependent(*A.first, A.second, *B.first, B.second, Strides);
1204  SafeForVectorization &= Dependence::isSafeForVectorization(Type);
1205 
1206  // Gather dependences unless we accumulated MaxInterestingDependence
1207  // dependences. In that case return as soon as we find the first
1208  // unsafe dependence. This puts a limit on this quadratic
1209  // algorithm.
1210  if (RecordInterestingDependences) {
1211  if (Dependence::isInterestingDependence(Type))
1212  InterestingDependences.push_back(
1213  Dependence(A.second, B.second, Type));
1214 
1215  if (InterestingDependences.size() >= MaxInterestingDependence) {
1216  RecordInterestingDependences = false;
1217  InterestingDependences.clear();
1218  DEBUG(dbgs() << "Too many dependences, stopped recording\n");
1219  }
1220  }
1221  if (!RecordInterestingDependences && !SafeForVectorization)
1222  return false;
1223  }
1224  ++OI;
1225  }
1226  AI++;
1227  }
1228  }
1229 
1230  DEBUG(dbgs() << "Total Interesting Dependences: "
1231  << InterestingDependences.size() << "\n");
1232  return SafeForVectorization;
1233 }
1234 
1237  MemAccessInfo Access(Ptr, isWrite);
1238  auto &IndexVector = Accesses.find(Access)->second;
1239 
1241  std::transform(IndexVector.begin(), IndexVector.end(),
1242  std::back_inserter(Insts),
1243  [&](unsigned Idx) { return this->InstMap[Idx]; });
1244  return Insts;
1245 }
1246 
1247 const char *MemoryDepChecker::Dependence::DepName[] = {
1248  "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
1249  "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
1250 
1252  raw_ostream &OS, unsigned Depth,
1253  const SmallVectorImpl<Instruction *> &Instrs) const {
1254  OS.indent(Depth) << DepName[Type] << ":\n";
1255  OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
1256  OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
1257 }
1258 
1259 bool LoopAccessInfo::canAnalyzeLoop() {
1260  // We need to have a loop header.
1261  DEBUG(dbgs() << "LAA: Found a loop: " <<
1262  TheLoop->getHeader()->getName() << '\n');
1263 
1264  // We can only analyze innermost loops.
1265  if (!TheLoop->empty()) {
1266  DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
1267  emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
1268  return false;
1269  }
1270 
1271  // We must have a single backedge.
1272  if (TheLoop->getNumBackEdges() != 1) {
1273  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1274  emitAnalysis(
1275  LoopAccessReport() <<
1276  "loop control flow is not understood by analyzer");
1277  return false;
1278  }
1279 
1280  // We must have a single exiting block.
1281  if (!TheLoop->getExitingBlock()) {
1282  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1283  emitAnalysis(
1284  LoopAccessReport() <<
1285  "loop control flow is not understood by analyzer");
1286  return false;
1287  }
1288 
1289  // We only handle bottom-tested loops, i.e. loop in which the condition is
1290  // checked at the end of each iteration. With that we can assume that all
1291  // instructions in the loop are executed the same number of times.
1292  if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
1293  DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
1294  emitAnalysis(
1295  LoopAccessReport() <<
1296  "loop control flow is not understood by analyzer");
1297  return false;
1298  }
1299 
1300  // ScalarEvolution needs to be able to find the exit count.
1301  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop);
1302  if (ExitCount == SE->getCouldNotCompute()) {
1304  "could not determine number of loop iterations");
1305  DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
1306  return false;
1307  }
1308 
1309  return true;
1310 }
1311 
1312 void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {
1313 
1314  typedef SmallVector<Value*, 16> ValueVector;
1315  typedef SmallPtrSet<Value*, 16> ValueSet;
1316 
1317  // Holds the Load and Store *instructions*.
1318  ValueVector Loads;
1319  ValueVector Stores;
1320 
1321  // Holds all the different accesses in the loop.
1322  unsigned NumReads = 0;
1323  unsigned NumReadWrites = 0;
1324 
1325  PtrRtChecking.Pointers.clear();
1326  PtrRtChecking.Need = false;
1327 
1328  const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
1329 
1330  // For each block.
1331  for (Loop::block_iterator bb = TheLoop->block_begin(),
1332  be = TheLoop->block_end(); bb != be; ++bb) {
1333 
1334  // Scan the BB and collect legal loads and stores.
1335  for (BasicBlock::iterator it = (*bb)->begin(), e = (*bb)->end(); it != e;
1336  ++it) {
1337 
1338  // If this is a load, save it. If this instruction can read from memory
1339  // but is not a load, then we quit. Notice that we don't handle function
1340  // calls that read or write.
1341  if (it->mayReadFromMemory()) {
1342  // Many math library functions read the rounding mode. We will only
1343  // vectorize a loop if it contains known function calls that don't set
1344  // the flag. Therefore, it is safe to ignore this read from memory.
1345  CallInst *Call = dyn_cast<CallInst>(it);
1346  if (Call && getIntrinsicIDForCall(Call, TLI))
1347  continue;
1348 
1349  // If the function has an explicit vectorized counterpart, we can safely
1350  // assume that it can be vectorized.
1351  if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1352  TLI->isFunctionVectorizable(Call->getCalledFunction()->getName()))
1353  continue;
1354 
1355  LoadInst *Ld = dyn_cast<LoadInst>(it);
1356  if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
1358  << "read with atomic ordering or volatile read");
1359  DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
1360  CanVecMem = false;
1361  return;
1362  }
1363  NumLoads++;
1364  Loads.push_back(Ld);
1365  DepChecker.addAccess(Ld);
1366  continue;
1367  }
1368 
1369  // Save 'store' instructions. Abort if other instructions write to memory.
1370  if (it->mayWriteToMemory()) {
1371  StoreInst *St = dyn_cast<StoreInst>(it);
1372  if (!St) {
1374  "instruction cannot be vectorized");
1375  CanVecMem = false;
1376  return;
1377  }
1378  if (!St->isSimple() && !IsAnnotatedParallel) {
1380  << "write with atomic ordering or volatile write");
1381  DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
1382  CanVecMem = false;
1383  return;
1384  }
1385  NumStores++;
1386  Stores.push_back(St);
1387  DepChecker.addAccess(St);
1388  }
1389  } // Next instr.
1390  } // Next block.
1391 
1392  // Now we have two lists that hold the loads and the stores.
1393  // Next, we find the pointers that they use.
1394 
1395  // Check if we see any stores. If there are no stores, then we don't
1396  // care if the pointers are *restrict*.
1397  if (!Stores.size()) {
1398  DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
1399  CanVecMem = true;
1400  return;
1401  }
1402 
1403  MemoryDepChecker::DepCandidates DependentAccesses;
1404  AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(),
1405  AA, LI, DependentAccesses);
1406 
1407  // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
1408  // multiple times on the same object. If the ptr is accessed twice, once
1409  // for read and once for write, it will only appear once (on the write
1410  // list). This is okay, since we are going to check for conflicts between
1411  // writes and between reads and writes, but not between reads and reads.
1412  ValueSet Seen;
1413 
1414  ValueVector::iterator I, IE;
1415  for (I = Stores.begin(), IE = Stores.end(); I != IE; ++I) {
1416  StoreInst *ST = cast<StoreInst>(*I);
1417  Value* Ptr = ST->getPointerOperand();
1418  // Check for store to loop invariant address.
1419  StoreToLoopInvariantAddress |= isUniform(Ptr);
1420  // If we did *not* see this pointer before, insert it to the read-write
1421  // list. At this phase it is only a 'write' list.
1422  if (Seen.insert(Ptr).second) {
1423  ++NumReadWrites;
1424 
1426  // The TBAA metadata could have a control dependency on the predication
1427  // condition, so we cannot rely on it when determining whether or not we
1428  // need runtime pointer checks.
1429  if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
1430  Loc.AATags.TBAA = nullptr;
1431 
1432  Accesses.addStore(Loc);
1433  }
1434  }
1435 
1436  if (IsAnnotatedParallel) {
1437  DEBUG(dbgs()
1438  << "LAA: A loop annotated parallel, ignore memory dependency "
1439  << "checks.\n");
1440  CanVecMem = true;
1441  return;
1442  }
1443 
1444  for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
1445  LoadInst *LD = cast<LoadInst>(*I);
1446  Value* Ptr = LD->getPointerOperand();
1447  // If we did *not* see this pointer before, insert it to the
1448  // read list. If we *did* see it before, then it is already in
1449  // the read-write list. This allows us to vectorize expressions
1450  // such as A[i] += x; Because the address of A[i] is a read-write
1451  // pointer. This only works if the index of A[i] is consecutive.
1452  // If the address of i is unknown (for example A[B[i]]) then we may
1453  // read a few words, modify, and write a few words, and some of the
1454  // words may be written to the same address.
1455  bool IsReadOnlyPtr = false;
1456  if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) {
1457  ++NumReads;
1458  IsReadOnlyPtr = true;
1459  }
1460 
1462  // The TBAA metadata could have a control dependency on the predication
1463  // condition, so we cannot rely on it when determining whether or not we
1464  // need runtime pointer checks.
1465  if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
1466  Loc.AATags.TBAA = nullptr;
1467 
1468  Accesses.addLoad(Loc, IsReadOnlyPtr);
1469  }
1470 
1471  // If we write (or read-write) to a single destination and there are no
1472  // other reads in this loop then is it safe to vectorize.
1473  if (NumReadWrites == 1 && NumReads == 0) {
1474  DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
1475  CanVecMem = true;
1476  return;
1477  }
1478 
1479  // Build dependence sets and check whether we need a runtime pointer bounds
1480  // check.
1481  Accesses.buildDependenceSets();
1482 
1483  // Find pointers with computable bounds. We are going to use this information
1484  // to place a runtime bound check.
1485  bool CanDoRTIfNeeded =
1486  Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides);
1487  if (!CanDoRTIfNeeded) {
1488  emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
1489  DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
1490  << "the array bounds.\n");
1491  CanVecMem = false;
1492  return;
1493  }
1494 
1495  DEBUG(dbgs() << "LAA: We can perform a memory runtime check if needed.\n");
1496 
1497  CanVecMem = true;
1498  if (Accesses.isDependencyCheckNeeded()) {
1499  DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
1500  CanVecMem = DepChecker.areDepsSafe(
1501  DependentAccesses, Accesses.getDependenciesToCheck(), Strides);
1502  MaxSafeDepDistBytes = DepChecker.getMaxSafeDepDistBytes();
1503 
1504  if (!CanVecMem && DepChecker.shouldRetryWithRuntimeCheck()) {
1505  DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
1506 
1507  // Clear the dependency checks. We assume they are not needed.
1508  Accesses.resetDepChecks(DepChecker);
1509 
1510  PtrRtChecking.reset();
1511  PtrRtChecking.Need = true;
1512 
1513  CanDoRTIfNeeded =
1514  Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true);
1515 
1516  // Check that we found the bounds for the pointer.
1517  if (!CanDoRTIfNeeded) {
1519  << "cannot check memory dependencies at runtime");
1520  DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
1521  CanVecMem = false;
1522  return;
1523  }
1524 
1525  CanVecMem = true;
1526  }
1527  }
1528 
1529  if (CanVecMem)
1530  DEBUG(dbgs() << "LAA: No unsafe dependent memory operations in loop. We"
1531  << (PtrRtChecking.Need ? "" : " don't")
1532  << " need runtime memory checks.\n");
1533  else {
1535  "unsafe dependent memory operations in loop");
1536  DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
1537  }
1538 }
1539 
1541  DominatorTree *DT) {
1542  assert(TheLoop->contains(BB) && "Unknown block used");
1543 
1544  // Blocks that do not dominate the latch need predication.
1545  BasicBlock* Latch = TheLoop->getLoopLatch();
1546  return !DT->dominates(BB, Latch);
1547 }
1548 
1549 void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
1550  assert(!Report && "Multiple reports generated");
1551  Report = Message;
1552 }
1553 
1555  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
1556 }
1557 
1558 // FIXME: this function is currently a duplicate of the one in
1559 // LoopVectorize.cpp.
1561  Instruction *Loc) {
1562  if (FirstInst)
1563  return FirstInst;
1564  if (Instruction *I = dyn_cast<Instruction>(V))
1565  return I->getParent() == Loc->getParent() ? I : nullptr;
1566  return nullptr;
1567 }
1568 
1569 std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(
1570  Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const {
1571  if (!PtrRtChecking.Need)
1572  return std::make_pair(nullptr, nullptr);
1573 
1574  SmallVector<TrackingVH<Value>, 2> Starts;
1576 
1577  LLVMContext &Ctx = Loc->getContext();
1578  SCEVExpander Exp(*SE, DL, "induction");
1579  Instruction *FirstInst = nullptr;
1580 
1581  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
1583  PtrRtChecking.CheckingGroups[i];
1584  Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue;
1585  const SCEV *Sc = SE->getSCEV(Ptr);
1586 
1587  if (SE->isLoopInvariant(Sc, TheLoop)) {
1588  DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
1589  << "\n");
1590  Starts.push_back(Ptr);
1591  Ends.push_back(Ptr);
1592  } else {
1593  unsigned AS = Ptr->getType()->getPointerAddressSpace();
1594 
1595  // Use this type for pointer arithmetic.
1596  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
1597  Value *Start = nullptr, *End = nullptr;
1598 
1599  DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
1600  Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc);
1601  End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc);
1602  DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n");
1603  Starts.push_back(Start);
1604  Ends.push_back(End);
1605  }
1606  }
1607 
1608  IRBuilder<> ChkBuilder(Loc);
1609  // Our instructions might fold to a constant.
1610  Value *MemoryRuntimeCheck = nullptr;
1611  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) {
1612  for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) {
1614  PtrRtChecking.CheckingGroups[i];
1616  PtrRtChecking.CheckingGroups[j];
1617 
1618  if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition))
1619  continue;
1620 
1621  unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace();
1622  unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace();
1623 
1624  assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) &&
1625  (AS1 == Ends[i]->getType()->getPointerAddressSpace()) &&
1626  "Trying to bounds check pointers with different address spaces");
1627 
1628  Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0);
1629  Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1);
1630 
1631  Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc");
1632  Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc");
1633  Value *End0 = ChkBuilder.CreateBitCast(Ends[i], PtrArithTy1, "bc");
1634  Value *End1 = ChkBuilder.CreateBitCast(Ends[j], PtrArithTy0, "bc");
1635 
1636  Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0");
1637  FirstInst = getFirstInst(FirstInst, Cmp0, Loc);
1638  Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1");
1639  FirstInst = getFirstInst(FirstInst, Cmp1, Loc);
1640  Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict");
1641  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1642  if (MemoryRuntimeCheck) {
1643  IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict,
1644  "conflict.rdx");
1645  FirstInst = getFirstInst(FirstInst, IsConflict, Loc);
1646  }
1647  MemoryRuntimeCheck = IsConflict;
1648  }
1649  }
1650 
1651  if (!MemoryRuntimeCheck)
1652  return std::make_pair(nullptr, nullptr);
1653 
1654  // We have to do this trickery because the IRBuilder might fold the check to a
1655  // constant expression in which case there is no Instruction anchored in a
1656  // the block.
1657  Instruction *Check = BinaryOperator::CreateAnd(MemoryRuntimeCheck,
1658  ConstantInt::getTrue(Ctx));
1659  ChkBuilder.Insert(Check, "memcheck.conflict");
1660  FirstInst = getFirstInst(FirstInst, Check, Loc);
1661  return std::make_pair(FirstInst, Check);
1662 }
1663 
1665  const DataLayout &DL,
1666  const TargetLibraryInfo *TLI, AliasAnalysis *AA,
1667  DominatorTree *DT, LoopInfo *LI,
1668  const ValueToValueMap &Strides)
1669  : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL),
1670  TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
1671  MaxSafeDepDistBytes(-1U), CanVecMem(false),
1672  StoreToLoopInvariantAddress(false) {
1673  if (canAnalyzeLoop())
1674  analyzeLoop(Strides);
1675 }
1676 
1677 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
1678  if (CanVecMem) {
1679  if (PtrRtChecking.Need)
1680  OS.indent(Depth) << "Memory dependences are safe with run-time checks\n";
1681  else
1682  OS.indent(Depth) << "Memory dependences are safe\n";
1683  }
1684 
1685  if (Report)
1686  OS.indent(Depth) << "Report: " << Report->str() << "\n";
1687 
1688  if (auto *InterestingDependences = DepChecker.getInterestingDependences()) {
1689  OS.indent(Depth) << "Interesting Dependences:\n";
1690  for (auto &Dep : *InterestingDependences) {
1691  Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());
1692  OS << "\n";
1693  }
1694  } else
1695  OS.indent(Depth) << "Too many interesting dependences, not recorded\n";
1696 
1697  // List the pair of accesses need run-time checks to prove independence.
1698  PtrRtChecking.print(OS, Depth);
1699  OS << "\n";
1700 
1701  OS.indent(Depth) << "Store to invariant address was "
1702  << (StoreToLoopInvariantAddress ? "" : "not ")
1703  << "found in loop.\n";
1704 }
1705 
1706 const LoopAccessInfo &
1708  auto &LAI = LoopAccessInfoMap[L];
1709 
1710 #ifndef NDEBUG
1711  assert((!LAI || LAI->NumSymbolicStrides == Strides.size()) &&
1712  "Symbolic strides changed for loop");
1713 #endif
1714 
1715  if (!LAI) {
1716  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
1717  LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
1718  Strides);
1719 #ifndef NDEBUG
1720  LAI->NumSymbolicStrides = Strides.size();
1721 #endif
1722  }
1723  return *LAI.get();
1724 }
1725 
1726 void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {
1727  LoopAccessAnalysis &LAA = *const_cast<LoopAccessAnalysis *>(this);
1728 
1729  ValueToValueMap NoSymbolicStrides;
1730 
1731  for (Loop *TopLevelLoop : *LI)
1732  for (Loop *L : depth_first(TopLevelLoop)) {
1733  OS.indent(2) << L->getHeader()->getName() << ":\n";
1734  auto &LAI = LAA.getInfo(L, NoSymbolicStrides);
1735  LAI.print(OS, 4);
1736  }
1737 }
1738 
1740  SE = &getAnalysis<ScalarEvolution>();
1741  auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
1742  TLI = TLIP ? &TLIP->getTLI() : nullptr;
1743  AA = &getAnalysis<AliasAnalysis>();
1744  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1745  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
1746 
1747  return false;
1748 }
1749 
1752  AU.addRequired<AliasAnalysis>();
1755 
1756  AU.setPreservesAll();
1757 }
1758 
1759 char LoopAccessAnalysis::ID = 0;
1760 static const char laa_name[] = "Loop Access Analysis";
1761 #define LAA_NAME "loop-accesses"
1762 
1769 
1770 namespace llvm {
1772  return new LoopAccessAnalysis();
1773  }
1774 }
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
bool isPossiblyBackward() const
Lexically backward dependence types.
#define LAA_NAME
unsigned getNumBackEdges() const
getNumBackEdges - Calculate the number of back edges to the loop header
Definition: LoopInfo.h:165
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:82
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
static unsigned RuntimeMemoryCheckThreshold
\brief When performing memory disambiguation checks at runtime do not make more than this number of c...
static const char laa_name[]
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
evaluateAtIteration - Return the value of this chain of recurrences at the specified iteration number...
void push_back(const T &Elt)
Definition: SmallVector.h:222
Intrinsic::ID getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides)
Insert a pointer and calculate the start and end SCEVs.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
SmallVector< CheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
APInt LLVM_ATTRIBUTE_UNUSED_RESULT abs() const
Get the absolute value;.
Definition: APInt.h:1571
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallPtrSet.h:78
static bool isInBoundsGep(Value *Ptr)
MDNode * TBAA
The tag for type-based alias analysis.
Definition: Metadata.h:562
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
Definition: LoopInfo.cpp:282
ScalarEvolution - This class is the main scalar evolution driver.
bool isSimple() const
Definition: Instructions.h:401
CallInst - This class represents a function call, abstracting a target machine's calling convention...
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
getStepRecurrence - This method constructs and returns the recurrence indicating how much this expres...
static bool hasComputableBounds(ScalarEvolution *SE, const ValueToValueMap &Strides, Value *Ptr)
Check whether a pointer can participate in a runtime bounds check.
void reset()
Reset the state of the pointer runtime information.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
bool isLoopInvariant(const SCEV *S, const Loop *L)
isLoopInvariant - Return true if the value of the given SCEV is unchanging in the specified loop...
Pass * createLAAPass()
A debug info location.
Definition: DebugLoc.h:34
F(f)
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:822
void GetUnderlyingObjects(Value *V, SmallVectorImpl< Value * > &Objects, const DataLayout &DL, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to GetUnderlyingObject except that it can look through phi and select instruct...
Hexagon Common GEP
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
static unsigned VectorizationFactor
VF as overridden by the user.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:216
member_iterator member_begin(iterator I) const
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
bool isSimple() const
Definition: Instructions.h:279
const SCEV * replaceSymbolicStrideSCEV(ScalarEvolution *SE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
BlockT * getHeader() const
Definition: LoopInfo.h:96
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:565
Type * getPointerElementType() const
Definition: Type.h:366
const SCEV * getStart() const
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
BlockT * getLoopLatch() const
getLoopLatch - If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:156
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1369
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:319
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
void emitOptimizationRemarkAnalysis(LLVMContext &Ctx, const char *PassName, const Function &Fn, const DebugLoc &DLoc, const Twine &Msg)
Emit an optimization analysis remark message.
static const unsigned MaxVectorWidth
Maximum SIMD width.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:389
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:106
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
member_iterator member_end() const
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
const SmallVectorImpl< Dependence > * getInterestingDependences() const
Returns the interesting dependences.
#define false
Definition: ConvertUTF.c:65
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass...
const Instruction * getInstr() const
bool isNegative() const
Definition: Constants.h:156
static const char * DepName[]
String version of the types.
load Combine Adjacent Loads
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides)
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:878
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:894
SCEVAddRecExpr - This node represents a polynomial recurrence on the trip count of the specified loop...
void print(raw_ostream &OS, unsigned Depth=0, const SmallVectorImpl< int > *PtrPartition=nullptr) const
Print the list run-time memory checks necessary.
std::set< ECValue >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
static Instruction * getFirstInst(Instruction *FirstInst, Value *V, Instruction *Loc)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
Optimization analysis message produced during vectorization.
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Definition: Instructions.h:830
const SCEV * getCouldNotCompute()
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
#define true
Definition: ConvertUTF.c:66
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
bool isAffine() const
isAffine - Return true if this represents an expression A + B*x where A and B are loop invariant valu...
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToValueMap &Map, bool InterpretConsts=false)
PointerIntPair - This class implements a pair of a pointer and small integer.
bool needsChecking(const CheckingPtrGroup &M, const CheckingPtrGroup &N, const SmallVectorImpl< int > *PtrPartition) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
static volatile int One
Definition: InfiniteTest.cpp:9
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:32
bool addPointer(unsigned Index)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group...
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Represent the analysis usage information of a pass.
bool contains(const LoopT *L) const
contains - Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:105
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
Value * expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I)
Insert code to directly compute the specified SCEV expression into the program.
BlockT * getExitingBlock() const
getExitingBlock - If getExitingBlocks would return exactly one block, return that block...
Definition: LoopInfoImpl.h:51
Value * getPointerOperand()
Definition: Instructions.h:284
int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap)
Check the stride of the pointer and ensure that it does not wrap in the address space.
iterator begin() const
Definition: SmallPtrSet.h:286
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:69
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:72
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
void groupChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Groups pointers such that a single memcheck is required between two different groups.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:283
Value * stripIntegerCast(Value *V)
static void emitAnalysis(const LoopAccessReport &Message, const Function *TheFunction, const Loop *TheLoop, const char *PassName)
Emit an analysis note for PassName with the debug location from the instruction in Message if availab...
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.h:460
DepType
The type of the dependence.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:214
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:53
unsigned getNumberOfChecks(const SmallVectorImpl< int > *PtrPartition) const
Returns the number of run-time checks required according to needsChecking.
const Value * Ptr
The address of the start of the location.
Representation for a specific memory location.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false...
Definition: SmallPtrSet.h:271
static void emitAnalysis(CallSite CS, const Twine &Msg)
Definition: Inliner.cpp:296
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1253
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, ScalarEvolution *SE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
Provides information about what library functions are available for the current target.
const LoopAccessInfo & getInfo(Loop *L, const ValueToValueMap &Strides)
Query the result of the loop access information for the loop L.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:67
void print(raw_ostream &OS, const Module *M=nullptr) const override
Print the result of the analysis when invoked with -analyze.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition: APInt.h:332
Drive the analysis of memory accesses in the loop.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
Function * getCalledFunction() const
getCalledFunction - Return the function called, or null if this is an indirect function invocation...
ConstantInt * getValue() const
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:530
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
Class for arbitrary precision integers.
Definition: APInt.h:73
bool needsAnyChecking(const SmallVectorImpl< int > *PtrPartition) const
Return true if any pointer requires run-time checking according to needsChecking. ...
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class uses information about analyze scalars to rewrite expressions in canonical form...
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
std::vector< BlockT * >::const_iterator block_iterator
Definition: LoopInfo.h:140
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
static bool isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
static cl::opt< unsigned > MaxInterestingDependence("max-interesting-dependences", cl::Hidden, cl::desc("Maximum number of interesting dependences collected by ""loop-access analysis (default = 100)"), cl::init(100))
We collect interesting dependences up to this threshold.
block_iterator block_end() const
Definition: LoopInfo.h:142
This analysis provides dependence information for the memory accesses of a loop.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
unsigned size() const
Definition: DenseMap.h:82
Dependece between memory access instructions.
SCEV - This class represents an analyzed expression in the program.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
std::error_code Check(std::error_code Err)
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
static bool areStridedAccessesIndependent(unsigned Distance, unsigned Stride, unsigned TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:651
iterator end()
Definition: DenseMap.h:68
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
const Loop * getLoop() const
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
iterator_range< df_iterator< T > > depth_first(const T &G)
const SCEV * getBackedgeTakenCount(const Loop *L)
getBackedgeTakenCount - If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCouldNotCompute object.
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. ""Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
RuntimePointerChecking & RtCheck
Constitutes the context of this pointer checking group.
LLVM Value Representation.
Definition: Value.h:69
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
const SCEV * getSCEV(Value *V)
getSCEV - Return a SCEV expression for the full generality of the specified expression.
A vector that has set insertion semantics.
Definition: SetVector.h:37
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
bool empty() const
Definition: LoopInfo.h:135
#define DEBUG(X)
Definition: Debug.h:92
block_iterator block_begin() const
Definition: LoopInfo.h:141
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:737
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
std::pair< Instruction *, Instruction * > addRuntimeCheck(Instruction *Loc, const SmallVectorImpl< int > *PtrPartition=nullptr) const
Add code that checks at runtime if the accessed arrays overlap.
static bool isInterestingDependence(DepType Type)
Dependence types that can be queried from the analysis.
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
Value * getPointerOperand()
Definition: Instructions.h:409
LocationClass< Ty > location(Ty &L)
Definition: CommandLine.h:340
const BasicBlock * getParent() const
Definition: Instruction.h:72
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge ""runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not ""generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
SCEVConstant - This class represents a constant integer value.
bool Need
This flag indicates if we need to add the runtime check.