LLVM 17.0.0git
LoopAccessAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface for the loop memory dependence framework that
10// was originally developed for the Loop Vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
15#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
16
21#include "llvm/Pass.h"
22#include <optional>
23
24namespace llvm {
25
26class AAResults;
27class DataLayout;
28class Loop;
29class LoopAccessInfo;
30class raw_ostream;
31class SCEV;
32class SCEVUnionPredicate;
33class Value;
34
35/// Collection of parameters shared beetween the Loop Vectorizer and the
36/// Loop Access Analysis.
38 /// Maximum SIMD width.
39 static const unsigned MaxVectorWidth;
40
41 /// VF as overridden by the user.
42 static unsigned VectorizationFactor;
43 /// Interleave factor as overridden by the user.
44 static unsigned VectorizationInterleave;
45 /// True if force-vector-interleave was specified by the user.
46 static bool isInterleaveForced();
47
48 /// \When performing memory disambiguation checks at runtime do not
49 /// make more than this number of comparisons.
51};
52
53/// Checks memory dependences among accesses to the same underlying
54/// object to determine whether there vectorization is legal or not (and at
55/// which vectorization factor).
56///
57/// Note: This class will compute a conservative dependence for access to
58/// different underlying pointers. Clients, such as the loop vectorizer, will
59/// sometimes deal these potential dependencies by emitting runtime checks.
60///
61/// We use the ScalarEvolution framework to symbolically evalutate access
62/// functions pairs. Since we currently don't restructure the loop we can rely
63/// on the program order of memory accesses to determine their safety.
64/// At the moment we will only deem accesses as safe for:
65/// * A negative constant distance assuming program order.
66///
67/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
68/// a[i] = tmp; y = a[i];
69///
70/// The latter case is safe because later checks guarantuee that there can't
71/// be a cycle through a phi node (that is, we check that "x" and "y" is not
72/// the same variable: a header phi can only be an induction or a reduction, a
73/// reduction can't have a memory sink, an induction can't have a memory
74/// source). This is important and must not be violated (or we have to
75/// resort to checking for cycles through memory).
76///
77/// * A positive constant distance assuming program order that is bigger
78/// than the biggest memory access.
79///
80/// tmp = a[i] OR b[i] = x
81/// a[i+2] = tmp y = b[i+2];
82///
83/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
84///
85/// * Zero distances and all accesses have the same size.
86///
88public:
91 /// Set of potential dependent memory accesses.
93
94 /// Type to keep track of the status of the dependence check. The order of
95 /// the elements is important and has to be from most permissive to least
96 /// permissive.
98 // Can vectorize safely without RT checks. All dependences are known to be
99 // safe.
100 Safe,
101 // Can possibly vectorize with RT checks to overcome unknown dependencies.
103 // Cannot vectorize due to known unsafe dependencies.
104 Unsafe,
105 };
106
107 /// Dependece between memory access instructions.
108 struct Dependence {
109 /// The type of the dependence.
110 enum DepType {
111 // No dependence.
113 // We couldn't determine the direction or the distance.
115 // Lexically forward.
116 //
117 // FIXME: If we only have loop-independent forward dependences (e.g. a
118 // read and write of A[i]), LAA will locally deem the dependence "safe"
119 // without querying the MemoryDepChecker. Therefore we can miss
120 // enumerating loop-independent forward dependences in
121 // getDependences. Note that as soon as there are different
122 // indices used to access the same array, the MemoryDepChecker *is*
123 // queried and the dependence list is complete.
125 // Forward, but if vectorized, is likely to prevent store-to-load
126 // forwarding.
128 // Lexically backward.
130 // Backward, but the distance allows a vectorization factor of
131 // MaxSafeDepDistBytes.
133 // Same, but may prevent store-to-load forwarding.
135 };
136
137 /// String version of the types.
138 static const char *DepName[];
139
140 /// Index of the source of the dependence in the InstMap vector.
141 unsigned Source;
142 /// Index of the destination of the dependence in the InstMap vector.
143 unsigned Destination;
144 /// The type of the dependence.
146
149
150 /// Return the source instruction of the dependence.
151 Instruction *getSource(const LoopAccessInfo &LAI) const;
152 /// Return the destination instruction of the dependence.
153 Instruction *getDestination(const LoopAccessInfo &LAI) const;
154
155 /// Dependence types that don't prevent vectorization.
157
158 /// Lexically forward dependence.
159 bool isForward() const;
160 /// Lexically backward dependence.
161 bool isBackward() const;
162
163 /// May be a lexically backward dependence type (includes Unknown).
164 bool isPossiblyBackward() const;
165
166 /// Print the dependence. \p Instr is used to map the instruction
167 /// indices to instructions.
168 void print(raw_ostream &OS, unsigned Depth,
169 const SmallVectorImpl<Instruction *> &Instrs) const;
170 };
171
173 : PSE(PSE), InnermostLoop(L) {}
174
175 /// Register the location (instructions are given increasing numbers)
176 /// of a write access.
177 void addAccess(StoreInst *SI);
178
179 /// Register the location (instructions are given increasing numbers)
180 /// of a write access.
181 void addAccess(LoadInst *LI);
182
183 /// Check whether the dependencies between the accesses are safe.
184 ///
185 /// Only checks sets with elements in \p CheckDeps.
186 bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
187 const ValueToValueMap &Strides);
188
189 /// No memory dependence was encountered that would inhibit
190 /// vectorization.
193 }
194
195 /// Return true if the number of elements that are safe to operate on
196 /// simultaneously is not bounded.
198 return MaxSafeVectorWidthInBits == UINT_MAX;
199 }
200
201 /// The maximum number of bytes of a vector register we can vectorize
202 /// the accesses safely with.
203 uint64_t getMaxSafeDepDistBytes() { return MaxSafeDepDistBytes; }
204
205 /// Return the number of elements that are safe to operate on
206 /// simultaneously, multiplied by the size of the element in bits.
208 return MaxSafeVectorWidthInBits;
209 }
210
211 /// In same cases when the dependency check fails we can still
212 /// vectorize the loop with a dynamic array access check.
214 return FoundNonConstantDistanceDependence &&
216 }
217
218 /// Returns the memory dependences. If null is returned we exceeded
219 /// the MaxDependences threshold and this information is not
220 /// available.
222 return RecordDependences ? &Dependences : nullptr;
223 }
224
225 void clearDependences() { Dependences.clear(); }
226
227 /// The vector of memory access instructions. The indices are used as
228 /// instruction identifiers in the Dependence class.
230 return InstMap;
231 }
232
233 /// Generate a mapping between the memory instructions and their
234 /// indices according to program order.
237
238 for (unsigned I = 0; I < InstMap.size(); ++I)
239 OrderMap[InstMap[I]] = I;
240
241 return OrderMap;
242 }
243
244 /// Find the set of instructions that read or write via \p Ptr.
246 bool isWrite) const;
247
248 /// Return the program order indices for the access location (Ptr, IsWrite).
249 /// Returns an empty ArrayRef if there are no accesses for the location.
251 auto I = Accesses.find({Ptr, IsWrite});
252 if (I != Accesses.end())
253 return I->second;
254 return {};
255 }
256
257 const Loop *getInnermostLoop() const { return InnermostLoop; }
258
259private:
260 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
261 /// applies dynamic knowledge to simplify SCEV expressions and convert them
262 /// to a more usable form. We need this in case assumptions about SCEV
263 /// expressions need to be made in order to avoid unknown dependences. For
264 /// example we might assume a unit stride for a pointer in order to prove
265 /// that a memory access is strided and doesn't wrap.
267 const Loop *InnermostLoop;
268
269 /// Maps access locations (ptr, read/write) to program order.
271
272 /// Memory access instructions in program order.
274
275 /// The program order index to be used for the next instruction.
276 unsigned AccessIdx = 0;
277
278 // We can access this many bytes in parallel safely.
279 uint64_t MaxSafeDepDistBytes = 0;
280
281 /// Number of elements (from consecutive iterations) that are safe to
282 /// operate on simultaneously, multiplied by the size of the element in bits.
283 /// The size of the element is taken from the memory access that is most
284 /// restrictive.
285 uint64_t MaxSafeVectorWidthInBits = -1U;
286
287 /// If we see a non-constant dependence distance we can still try to
288 /// vectorize this loop with runtime checks.
289 bool FoundNonConstantDistanceDependence = false;
290
291 /// Result of the dependence checks, indicating whether the checked
292 /// dependences are safe for vectorization, require RT checks or are known to
293 /// be unsafe.
295
296 //// True if Dependences reflects the dependences in the
297 //// loop. If false we exceeded MaxDependences and
298 //// Dependences is invalid.
299 bool RecordDependences = true;
300
301 /// Memory dependences collected during the analysis. Only valid if
302 /// RecordDependences is true.
303 SmallVector<Dependence, 8> Dependences;
304
305 /// Check whether there is a plausible dependence between the two
306 /// accesses.
307 ///
308 /// Access \p A must happen before \p B in program order. The two indices
309 /// identify the index into the program order map.
310 ///
311 /// This function checks whether there is a plausible dependence (or the
312 /// absence of such can't be proved) between the two accesses. If there is a
313 /// plausible dependence but the dependence distance is bigger than one
314 /// element access it records this distance in \p MaxSafeDepDistBytes (if this
315 /// distance is smaller than any other distance encountered so far).
316 /// Otherwise, this function returns true signaling a possible dependence.
317 Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
318 const MemAccessInfo &B, unsigned BIdx,
319 const ValueToValueMap &Strides);
320
321 /// Check whether the data dependence could prevent store-load
322 /// forwarding.
323 ///
324 /// \return false if we shouldn't vectorize at all or avoid larger
325 /// vectorization factors by limiting MaxSafeDepDistBytes.
326 bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
327
328 /// Updates the current safety status with \p S. We can go from Safe to
329 /// either PossiblySafeWithRtChecks or Unsafe and from
330 /// PossiblySafeWithRtChecks to Unsafe.
331 void mergeInStatus(VectorizationSafetyStatus S);
332};
333
334class RuntimePointerChecking;
335/// A grouping of pointers. A single memcheck is required between
336/// two groups.
338 /// Create a new pointer checking group containing a single
339 /// pointer, with index \p Index in RtCheck.
341
342 /// Tries to add the pointer recorded in RtCheck at index
343 /// \p Index to this pointer checking group. We can only add a pointer
344 /// to a checking group if we will still be able to get
345 /// the upper and lower bounds of the check. Returns true in case
346 /// of success, false otherwise.
347 bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
348 bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
349 unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
350
351 /// The SCEV expression which represents the upper bound of all the
352 /// pointers in this group.
353 const SCEV *High;
354 /// The SCEV expression which represents the lower bound of all the
355 /// pointers in this group.
356 const SCEV *Low;
357 /// Indices of all the pointers that constitute this grouping.
359 /// Address space of the involved pointers.
360 unsigned AddressSpace;
361 /// Whether the pointer needs to be frozen after expansion, e.g. because it
362 /// may be poison outside the loop.
363 bool NeedsFreeze = false;
364};
365
366/// A memcheck which made up of a pair of grouped pointers.
367typedef std::pair<const RuntimeCheckingPtrGroup *,
370
374 unsigned AccessSize;
376
378 unsigned AccessSize, bool NeedsFreeze)
381};
382
383/// Holds information about the memory runtime legality checks to verify
384/// that a group of pointers do not overlap.
387
388public:
389 struct PointerInfo {
390 /// Holds the pointer value that we need to check.
392 /// Holds the smallest byte address accessed by the pointer throughout all
393 /// iterations of the loop.
394 const SCEV *Start;
395 /// Holds the largest byte address accessed by the pointer throughout all
396 /// iterations of the loop, plus 1.
397 const SCEV *End;
398 /// Holds the information if this pointer is used for writing to memory.
400 /// Holds the id of the set of pointers that could be dependent because of a
401 /// shared underlying object.
403 /// Holds the id of the disjoint alias set to which this pointer belongs.
404 unsigned AliasSetId;
405 /// SCEV for the access.
406 const SCEV *Expr;
407 /// True if the pointer expressions needs to be frozen after expansion.
409
411 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
412 const SCEV *Expr, bool NeedsFreeze)
416 };
417
419 : DC(DC), SE(SE) {}
420
421 /// Reset the state of the pointer runtime information.
422 void reset() {
423 Need = false;
424 Pointers.clear();
425 Checks.clear();
426 }
427
428 /// Insert a pointer and calculate the start and end SCEVs.
429 /// We need \p PSE in order to compute the SCEV expression of the pointer
430 /// according to the assumptions that we've made during the analysis.
431 /// The method might also version the pointer stride according to \p Strides,
432 /// and add new predicates to \p PSE.
433 void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
434 bool WritePtr, unsigned DepSetId, unsigned ASId,
435 PredicatedScalarEvolution &PSE, bool NeedsFreeze);
436
437 /// No run-time memory checking is necessary.
438 bool empty() const { return Pointers.empty(); }
439
440 /// Generate the checks and store it. This also performs the grouping
441 /// of pointers to reduce the number of memchecks necessary.
442 void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
443 bool UseDependencies);
444
445 /// Returns the checks that generateChecks created. They can be used to ensure
446 /// no read/write accesses overlap across all loop iterations.
448 return Checks;
449 }
450
451 // Returns an optional list of (pointer-difference expressions, access size)
452 // pairs that can be used to prove that there are no vectorization-preventing
453 // dependencies at runtime. There are is a vectorization-preventing dependency
454 // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
455 // std::nullopt if pointer-difference checks cannot be used.
456 std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
457 if (!CanUseDiffCheck)
458 return std::nullopt;
459 return {DiffChecks};
460 }
461
462 /// Decide if we need to add a check between two groups of pointers,
463 /// according to needsChecking.
465 const RuntimeCheckingPtrGroup &N) const;
466
467 /// Returns the number of run-time checks required according to
468 /// needsChecking.
469 unsigned getNumberOfChecks() const { return Checks.size(); }
470
471 /// Print the list run-time memory checks necessary.
472 void print(raw_ostream &OS, unsigned Depth = 0) const;
473
474 /// Print \p Checks.
475 void printChecks(raw_ostream &OS,
477 unsigned Depth = 0) const;
478
479 /// This flag indicates if we need to add the runtime check.
480 bool Need = false;
481
482 /// Information about the pointers that may require checking.
484
485 /// Holds a partitioning of pointers into "check groups".
487
488 /// Check if pointers are in the same partition
489 ///
490 /// \p PtrToPartition contains the partition number for pointers (-1 if the
491 /// pointer belongs to multiple partitions).
492 static bool
494 unsigned PtrIdx1, unsigned PtrIdx2);
495
496 /// Decide whether we need to issue a run-time check for pointer at
497 /// index \p I and \p J to prove their independence.
498 bool needsChecking(unsigned I, unsigned J) const;
499
500 /// Return PointerInfo for pointer at index \p PtrIdx.
501 const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
502 return Pointers[PtrIdx];
503 }
504
505 ScalarEvolution *getSE() const { return SE; }
506
507private:
508 /// Groups pointers such that a single memcheck is required
509 /// between two different groups. This will clear the CheckingGroups vector
510 /// and re-compute it. We will only group dependecies if \p UseDependencies
511 /// is true, otherwise we will create a separate group for each pointer.
512 void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
513 bool UseDependencies);
514
515 /// Generate the checks and return them.
517
518 /// Try to create add a new (pointer-difference, access size) pair to
519 /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
520 /// checks cannot be used for the groups, set CanUseDiffCheck to false.
521 void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
522 const RuntimeCheckingPtrGroup &CGJ);
523
525
526 /// Holds a pointer to the ScalarEvolution analysis.
527 ScalarEvolution *SE;
528
529 /// Set of run-time checks required to establish independence of
530 /// otherwise may-aliasing pointers in the loop.
532
533 /// Flag indicating if pointer-difference checks can be used
534 bool CanUseDiffCheck = true;
535
536 /// A list of (pointer-difference, access size) pairs that can be used to
537 /// prove that there are no vectorization-preventing dependencies.
539};
540
541/// Drive the analysis of memory accesses in the loop
542///
543/// This class is responsible for analyzing the memory accesses of a loop. It
544/// collects the accesses and then its main helper the AccessAnalysis class
545/// finds and categorizes the dependences in buildDependenceSets.
546///
547/// For memory dependences that can be analyzed at compile time, it determines
548/// whether the dependence is part of cycle inhibiting vectorization. This work
549/// is delegated to the MemoryDepChecker class.
550///
551/// For memory dependences that cannot be determined at compile time, it
552/// generates run-time checks to prove independence. This is done by
553/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
554/// RuntimePointerCheck class.
555///
556/// If pointers can wrap or can't be expressed as affine AddRec expressions by
557/// ScalarEvolution, we will generate run-time checks by emitting a
558/// SCEVUnionPredicate.
559///
560/// Checks for both memory dependences and the SCEV predicates contained in the
561/// PSE must be emitted in order for the results of this analysis to be valid.
563public:
565 AAResults *AA, DominatorTree *DT, LoopInfo *LI);
566
567 /// Return true we can analyze the memory accesses in the loop and there are
568 /// no memory dependence cycles.
569 bool canVectorizeMemory() const { return CanVecMem; }
570
571 /// Return true if there is a convergent operation in the loop. There may
572 /// still be reported runtime pointer checks that would be required, but it is
573 /// not legal to insert them.
574 bool hasConvergentOp() const { return HasConvergentOp; }
575
577 return PtrRtChecking.get();
578 }
579
580 /// Number of memchecks required to prove independence of otherwise
581 /// may-alias pointers.
582 unsigned getNumRuntimePointerChecks() const {
583 return PtrRtChecking->getNumberOfChecks();
584 }
585
586 /// Return true if the block BB needs to be predicated in order for the loop
587 /// to be vectorized.
588 static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
589 DominatorTree *DT);
590
591 /// Returns true if the value V is uniform within the loop.
592 bool isUniform(Value *V) const;
593
594 uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
595 unsigned getNumStores() const { return NumStores; }
596 unsigned getNumLoads() const { return NumLoads;}
597
598 /// The diagnostics report generated for the analysis. E.g. why we
599 /// couldn't analyze the loop.
600 const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
601
602 /// the Memory Dependence Checker which can determine the
603 /// loop-independent and loop-carried dependences between memory accesses.
604 const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
605
606 /// Return the list of instructions that use \p Ptr to read or write
607 /// memory.
609 bool isWrite) const {
610 return DepChecker->getInstructionsForAccess(Ptr, isWrite);
611 }
612
613 /// If an access has a symbolic strides, this maps the pointer value to
614 /// the stride symbol.
615 const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; }
616
617 /// Pointer has a symbolic stride.
618 bool hasStride(Value *V) const { return StrideSet.count(V); }
619
620 /// Print the information about the memory accesses in the loop.
621 void print(raw_ostream &OS, unsigned Depth = 0) const;
622
623 /// If the loop has memory dependence involving an invariant address, i.e. two
624 /// stores or a store and a load, then return true, else return false.
626 return HasDependenceInvolvingLoopInvariantAddress;
627 }
628
629 /// Return the list of stores to invariant addresses.
631 return StoresToInvariantAddresses;
632 }
633
634 /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
635 /// them to a more usable form. All SCEV expressions during the analysis
636 /// should be re-written (and therefore simplified) according to PSE.
637 /// A user of LoopAccessAnalysis will need to emit the runtime checks
638 /// associated with this predicate.
639 const PredicatedScalarEvolution &getPSE() const { return *PSE; }
640
641private:
642 /// Analyze the loop.
643 void analyzeLoop(AAResults *AA, LoopInfo *LI,
644 const TargetLibraryInfo *TLI, DominatorTree *DT);
645
646 /// Check if the structure of the loop allows it to be analyzed by this
647 /// pass.
648 bool canAnalyzeLoop();
649
650 /// Save the analysis remark.
651 ///
652 /// LAA does not directly emits the remarks. Instead it stores it which the
653 /// client can retrieve and presents as its own analysis
654 /// (e.g. -Rpass-analysis=loop-vectorize).
655 OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
656 Instruction *Instr = nullptr);
657
658 /// Collect memory access with loop invariant strides.
659 ///
660 /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
661 /// invariant.
662 void collectStridedAccess(Value *LoadOrStoreInst);
663
664 // Emits the first unsafe memory dependence in a loop.
665 // Emits nothing if there are no unsafe dependences
666 // or if the dependences were not recorded.
667 void emitUnsafeDependenceRemark();
668
669 std::unique_ptr<PredicatedScalarEvolution> PSE;
670
671 /// We need to check that all of the pointers in this list are disjoint
672 /// at runtime. Using std::unique_ptr to make using move ctor simpler.
673 std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
674
675 /// the Memory Dependence Checker which can determine the
676 /// loop-independent and loop-carried dependences between memory accesses.
677 std::unique_ptr<MemoryDepChecker> DepChecker;
678
679 Loop *TheLoop;
680
681 unsigned NumLoads = 0;
682 unsigned NumStores = 0;
683
684 uint64_t MaxSafeDepDistBytes = -1;
685
686 /// Cache the result of analyzeLoop.
687 bool CanVecMem = false;
688 bool HasConvergentOp = false;
689
690 /// Indicator that there are non vectorizable stores to a uniform address.
691 bool HasDependenceInvolvingLoopInvariantAddress = false;
692
693 /// List of stores to invariant addresses.
694 SmallVector<StoreInst *> StoresToInvariantAddresses;
695
696 /// The diagnostics report generated for the analysis. E.g. why we
697 /// couldn't analyze the loop.
698 std::unique_ptr<OptimizationRemarkAnalysis> Report;
699
700 /// If an access has a symbolic strides, this maps the pointer value to
701 /// the stride symbol.
702 ValueToValueMap SymbolicStrides;
703
704 /// Set of symbolic strides values.
705 SmallPtrSet<Value *, 8> StrideSet;
706};
707
708Value *stripIntegerCast(Value *V);
709
710/// Return the SCEV corresponding to a pointer with the symbolic stride
711/// replaced with constant one, assuming the SCEV predicate associated with
712/// \p PSE is true.
713///
714/// If necessary this method will version the stride of the pointer according
715/// to \p PtrToStride and therefore add further predicates to \p PSE.
716///
717/// \p PtrToStride provides the mapping between the pointer value and its
718/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
719const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
720 const ValueToValueMap &PtrToStride,
721 Value *Ptr);
722
723/// If the pointer has a constant stride return it in units of the access type
724/// size. Otherwise return std::nullopt.
725///
726/// Ensure that it does not wrap in the address space, assuming the predicate
727/// associated with \p PSE is true.
728///
729/// If necessary this method will version the stride of the pointer according
730/// to \p PtrToStride and therefore add further predicates to \p PSE.
731/// The \p Assume parameter indicates if we are allowed to make additional
732/// run-time assumptions.
733std::optional<int64_t>
734getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
735 const Loop *Lp,
736 const ValueToValueMap &StridesMap = ValueToValueMap(),
737 bool Assume = false, bool ShouldCheckWrap = true);
738
739/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
740/// compatible and it is possible to calculate the distance between them. This
741/// is a simple API that does not depend on the analysis pass.
742/// \param StrictCheck Ensure that the calculated distance matches the
743/// type-based one after all the bitcasts removal in the provided pointers.
744std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
745 Value *PtrB, const DataLayout &DL,
746 ScalarEvolution &SE,
747 bool StrictCheck = false,
748 bool CheckType = true);
749
750/// Attempt to sort the pointers in \p VL and return the sorted indices
751/// in \p SortedIndices, if reordering is required.
752///
753/// Returns 'true' if sorting is legal, otherwise returns 'false'.
754///
755/// For example, for a given \p VL of memory accesses in program order, a[i+4],
756/// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
757/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
758/// saves the mask for actual memory accesses in program order in
759/// \p SortedIndices as <1,2,0,3>
760bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
761 ScalarEvolution &SE,
762 SmallVectorImpl<unsigned> &SortedIndices);
763
764/// Returns true if the memory operations \p A and \p B are consecutive.
765/// This is a simple API that does not depend on the analysis pass.
766bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
767 ScalarEvolution &SE, bool CheckType = true);
768
770 /// The cache.
772
773 // The used analysis passes.
774 ScalarEvolution &SE;
775 AAResults &AA;
776 DominatorTree &DT;
777 LoopInfo &LI;
778 const TargetLibraryInfo *TLI = nullptr;
779
780public:
782 LoopInfo &LI, const TargetLibraryInfo *TLI)
783 : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {}
784
785 const LoopAccessInfo &getInfo(Loop &L);
786
787 void clear() { LoopAccessInfoMap.clear(); }
788};
789
790/// This analysis provides dependence information for the memory accesses
791/// of a loop.
792///
793/// It runs the analysis for a loop on demand. This can be initiated by
794/// querying the loop access info via LAA::getInfo. getInfo return a
795/// LoopAccessInfo object. See this class for the specifics of what information
796/// is provided.
798public:
799 static char ID;
800
802
803 bool runOnFunction(Function &F) override;
804
805 void getAnalysisUsage(AnalysisUsage &AU) const override;
806
807 /// Return the proxy object for retrieving LoopAccessInfo for individual
808 /// loops.
809 ///
810 /// If there is no cached result available run the analysis.
811 LoopAccessInfoManager &getLAIs() { return *LAIs; }
812
813 void releaseMemory() override {
814 // Invalidate the cache when the pass is freed.
815 LAIs->clear();
816 }
817
818private:
819 std::unique_ptr<LoopAccessInfoManager> LAIs;
820};
821
822/// This analysis provides dependence information for the memory
823/// accesses of a loop.
824///
825/// It runs the analysis for a loop on demand. This can be initiated by
826/// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
827/// getResult return a LoopAccessInfo object. See this class for the
828/// specifics of what information is provided.
830 : public AnalysisInfoMixin<LoopAccessAnalysis> {
832 static AnalysisKey Key;
833
834public:
836
838};
839
841 const LoopAccessInfo &LAI) const {
843}
844
846 const LoopAccessInfo &LAI) const {
847 return LAI.getDepChecker().getMemoryInstructions()[Destination];
848}
849
850} // End llvm namespace
851
852#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MapVector< const Value *, unsigned > OrderMap
Definition: AsmWriter.cpp:98
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
An instruction for reading from memory.
Definition: Instructions.h:177
This analysis provides dependence information for the memory accesses of a loop.
LoopAccessInfoManager Result
Result run(Function &F, FunctionAnalysisManager &AM)
const LoopAccessInfo & getInfo(Loop &L)
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, const TargetLibraryInfo *TLI)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
unsigned getNumLoads() const
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
bool hasStride(Value *V) const
Pointer has a symbolic stride.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
uint64_t getMaxSafeDepDistBytes() const
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
unsigned getNumStores() const
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
const ValueToValueMap & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
This analysis provides dependence information for the memory accesses of a loop.
LoopAccessInfoManager & getLAIs()
Return the proxy object for retrieving LoopAccessInfo for individual loops.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
void releaseMemory() override
releaseMemory() - This member can be implemented by a pass if it wants to be able to release its memo...
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
uint64_t getMaxSafeDepDistBytes()
The maximum number of bytes of a vector register we can vectorize the accesses safely with.
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallVector< MemAccessInfo, 8 > MemAccessInfoList
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
bool shouldRetryWithRuntimeCheck() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
PointerIntPair< Value *, 1, bool > MemAccessInfo
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
std::optional< ArrayRef< PointerDiffInfo > > getDiffChecks() const
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
bool empty() const
No run-time memory checking is necessary.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
ScalarEvolution * getSE() const
void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Provides information about what library functions are available for the current target.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
DenseMap< const Value *, Value * > ValueToValueMap
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Value * stripIntegerCast(Value *V)
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
#define N
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
Dependece between memory access instructions.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
Dependence(unsigned Source, unsigned Destination, DepType Type)
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
bool isForward() const
Lexically forward dependence.
bool isBackward() const
Lexically backward dependence.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Instruction * getDestination(const LoopAccessInfo &LAI) const
Return the destination instruction of the dependence.
Instruction * getSource(const LoopAccessInfo &LAI) const
Return the source instruction of the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static const char * DepName[]
String version of the types.
PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, unsigned AccessSize, bool NeedsFreeze)
unsigned AddressSpace
Address space of the involved pointers.
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, const SCEV *Expr, bool NeedsFreeze)
const SCEV * Start
Holds the smallest byte address accessed by the pointer throughout all iterations of the loop.
const SCEV * Expr
SCEV for the access.
bool NeedsFreeze
True if the pointer expressions needs to be frozen after expansion.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
const SCEV * End
Holds the largest byte address accessed by the pointer throughout all iterations of the loop,...
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static unsigned VectorizationFactor
VF as overridden by the user.
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.