LLVM 20.0.0git
LoopAccessAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface for the loop memory dependence framework that
10// was originally developed for the Loop Vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
15#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
16
21#include <optional>
22#include <variant>
23
24namespace llvm {
25
26class AAResults;
27class DataLayout;
28class Loop;
29class LoopAccessInfo;
30class raw_ostream;
31class SCEV;
32class SCEVUnionPredicate;
33class Value;
34
35/// Collection of parameters shared beetween the Loop Vectorizer and the
36/// Loop Access Analysis.
38 /// Maximum SIMD width.
39 static const unsigned MaxVectorWidth;
40
41 /// VF as overridden by the user.
42 static unsigned VectorizationFactor;
43 /// Interleave factor as overridden by the user.
44 static unsigned VectorizationInterleave;
45 /// True if force-vector-interleave was specified by the user.
46 static bool isInterleaveForced();
47
48 /// \When performing memory disambiguation checks at runtime do not
49 /// make more than this number of comparisons.
51
52 // When creating runtime checks for nested loops, where possible try to
53 // write the checks in a form that allows them to be easily hoisted out of
54 // the outermost loop. For example, we can do this by expanding the range of
55 // addresses considered to include the entire nested loop so that they are
56 // loop invariant.
57 static bool HoistRuntimeChecks;
58};
59
60/// Checks memory dependences among accesses to the same underlying
61/// object to determine whether there vectorization is legal or not (and at
62/// which vectorization factor).
63///
64/// Note: This class will compute a conservative dependence for access to
65/// different underlying pointers. Clients, such as the loop vectorizer, will
66/// sometimes deal these potential dependencies by emitting runtime checks.
67///
68/// We use the ScalarEvolution framework to symbolically evalutate access
69/// functions pairs. Since we currently don't restructure the loop we can rely
70/// on the program order of memory accesses to determine their safety.
71/// At the moment we will only deem accesses as safe for:
72/// * A negative constant distance assuming program order.
73///
74/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
75/// a[i] = tmp; y = a[i];
76///
77/// The latter case is safe because later checks guarantuee that there can't
78/// be a cycle through a phi node (that is, we check that "x" and "y" is not
79/// the same variable: a header phi can only be an induction or a reduction, a
80/// reduction can't have a memory sink, an induction can't have a memory
81/// source). This is important and must not be violated (or we have to
82/// resort to checking for cycles through memory).
83///
84/// * A positive constant distance assuming program order that is bigger
85/// than the biggest memory access.
86///
87/// tmp = a[i] OR b[i] = x
88/// a[i+2] = tmp y = b[i+2];
89///
90/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
91///
92/// * Zero distances and all accesses have the same size.
93///
95public:
98 /// Set of potential dependent memory accesses.
100
101 /// Type to keep track of the status of the dependence check. The order of
102 /// the elements is important and has to be from most permissive to least
103 /// permissive.
105 // Can vectorize safely without RT checks. All dependences are known to be
106 // safe.
107 Safe,
108 // Can possibly vectorize with RT checks to overcome unknown dependencies.
110 // Cannot vectorize due to known unsafe dependencies.
111 Unsafe,
112 };
113
114 /// Dependece between memory access instructions.
115 struct Dependence {
116 /// The type of the dependence.
117 enum DepType {
118 // No dependence.
120 // We couldn't determine the direction or the distance.
122 // At least one of the memory access instructions may access a loop
123 // varying object, e.g. the address of underlying object is loaded inside
124 // the loop, like A[B[i]]. We cannot determine direction or distance in
125 // those cases, and also are unable to generate any runtime checks.
127
128 // Lexically forward.
129 //
130 // FIXME: If we only have loop-independent forward dependences (e.g. a
131 // read and write of A[i]), LAA will locally deem the dependence "safe"
132 // without querying the MemoryDepChecker. Therefore we can miss
133 // enumerating loop-independent forward dependences in
134 // getDependences. Note that as soon as there are different
135 // indices used to access the same array, the MemoryDepChecker *is*
136 // queried and the dependence list is complete.
138 // Forward, but if vectorized, is likely to prevent store-to-load
139 // forwarding.
141 // Lexically backward.
143 // Backward, but the distance allows a vectorization factor of dependent
144 // on MinDepDistBytes.
146 // Same, but may prevent store-to-load forwarding.
148 };
149
150 /// String version of the types.
151 static const char *DepName[];
152
153 /// Index of the source of the dependence in the InstMap vector.
154 unsigned Source;
155 /// Index of the destination of the dependence in the InstMap vector.
156 unsigned Destination;
157 /// The type of the dependence.
159
162
163 /// Return the source instruction of the dependence.
164 Instruction *getSource(const MemoryDepChecker &DepChecker) const;
165 /// Return the destination instruction of the dependence.
166 Instruction *getDestination(const MemoryDepChecker &DepChecker) const;
167
168 /// Dependence types that don't prevent vectorization.
170
171 /// Lexically forward dependence.
172 bool isForward() const;
173 /// Lexically backward dependence.
174 bool isBackward() const;
175
176 /// May be a lexically backward dependence type (includes Unknown).
177 bool isPossiblyBackward() const;
178
179 /// Print the dependence. \p Instr is used to map the instruction
180 /// indices to instructions.
181 void print(raw_ostream &OS, unsigned Depth,
182 const SmallVectorImpl<Instruction *> &Instrs) const;
183 };
184
186 const DenseMap<Value *, const SCEV *> &SymbolicStrides,
187 unsigned MaxTargetVectorWidthInBits)
188 : PSE(PSE), InnermostLoop(L), SymbolicStrides(SymbolicStrides),
189 MaxTargetVectorWidthInBits(MaxTargetVectorWidthInBits) {}
190
191 /// Register the location (instructions are given increasing numbers)
192 /// of a write access.
193 void addAccess(StoreInst *SI);
194
195 /// Register the location (instructions are given increasing numbers)
196 /// of a write access.
197 void addAccess(LoadInst *LI);
198
199 /// Check whether the dependencies between the accesses are safe.
200 ///
201 /// Only checks sets with elements in \p CheckDeps.
202 bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
204 &UnderlyingObjects);
205
206 /// No memory dependence was encountered that would inhibit
207 /// vectorization.
210 }
211
212 /// Return true if the number of elements that are safe to operate on
213 /// simultaneously is not bounded.
215 return MaxSafeVectorWidthInBits == UINT_MAX;
216 }
217
218 /// Return the number of elements that are safe to operate on
219 /// simultaneously, multiplied by the size of the element in bits.
221 return MaxSafeVectorWidthInBits;
222 }
223
224 /// In same cases when the dependency check fails we can still
225 /// vectorize the loop with a dynamic array access check.
227 return FoundNonConstantDistanceDependence &&
229 }
230
231 /// Returns the memory dependences. If null is returned we exceeded
232 /// the MaxDependences threshold and this information is not
233 /// available.
235 return RecordDependences ? &Dependences : nullptr;
236 }
237
238 void clearDependences() { Dependences.clear(); }
239
240 /// The vector of memory access instructions. The indices are used as
241 /// instruction identifiers in the Dependence class.
243 return InstMap;
244 }
245
246 /// Generate a mapping between the memory instructions and their
247 /// indices according to program order.
250
251 for (unsigned I = 0; I < InstMap.size(); ++I)
252 OrderMap[InstMap[I]] = I;
253
254 return OrderMap;
255 }
256
257 /// Find the set of instructions that read or write via \p Ptr.
259 bool isWrite) const;
260
261 /// Return the program order indices for the access location (Ptr, IsWrite).
262 /// Returns an empty ArrayRef if there are no accesses for the location.
264 auto I = Accesses.find({Ptr, IsWrite});
265 if (I != Accesses.end())
266 return I->second;
267 return {};
268 }
269
270 const Loop *getInnermostLoop() const { return InnermostLoop; }
271
273 std::pair<const SCEV *, const SCEV *>> &
275 return PointerBounds;
276 }
277
278private:
279 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
280 /// applies dynamic knowledge to simplify SCEV expressions and convert them
281 /// to a more usable form. We need this in case assumptions about SCEV
282 /// expressions need to be made in order to avoid unknown dependences. For
283 /// example we might assume a unit stride for a pointer in order to prove
284 /// that a memory access is strided and doesn't wrap.
286 const Loop *InnermostLoop;
287
288 /// Reference to map of pointer values to
289 /// their stride symbols, if they have a symbolic stride.
290 const DenseMap<Value *, const SCEV *> &SymbolicStrides;
291
292 /// Maps access locations (ptr, read/write) to program order.
294
295 /// Memory access instructions in program order.
297
298 /// The program order index to be used for the next instruction.
299 unsigned AccessIdx = 0;
300
301 /// The smallest dependence distance in bytes in the loop. This may not be
302 /// the same as the maximum number of bytes that are safe to operate on
303 /// simultaneously.
304 uint64_t MinDepDistBytes = 0;
305
306 /// Number of elements (from consecutive iterations) that are safe to
307 /// operate on simultaneously, multiplied by the size of the element in bits.
308 /// The size of the element is taken from the memory access that is most
309 /// restrictive.
310 uint64_t MaxSafeVectorWidthInBits = -1U;
311
312 /// If we see a non-constant dependence distance we can still try to
313 /// vectorize this loop with runtime checks.
314 bool FoundNonConstantDistanceDependence = false;
315
316 /// Result of the dependence checks, indicating whether the checked
317 /// dependences are safe for vectorization, require RT checks or are known to
318 /// be unsafe.
320
321 //// True if Dependences reflects the dependences in the
322 //// loop. If false we exceeded MaxDependences and
323 //// Dependences is invalid.
324 bool RecordDependences = true;
325
326 /// Memory dependences collected during the analysis. Only valid if
327 /// RecordDependences is true.
328 SmallVector<Dependence, 8> Dependences;
329
330 /// The maximum width of a target's vector registers multiplied by 2 to also
331 /// roughly account for additional interleaving. Is used to decide if a
332 /// backwards dependence with non-constant stride should be classified as
333 /// backwards-vectorizable or unknown (triggering a runtime check).
334 unsigned MaxTargetVectorWidthInBits = 0;
335
336 /// Mapping of SCEV expressions to their expanded pointer bounds (pair of
337 /// start and end pointer expressions).
339 std::pair<const SCEV *, const SCEV *>>
341
342 /// Check whether there is a plausible dependence between the two
343 /// accesses.
344 ///
345 /// Access \p A must happen before \p B in program order. The two indices
346 /// identify the index into the program order map.
347 ///
348 /// This function checks whether there is a plausible dependence (or the
349 /// absence of such can't be proved) between the two accesses. If there is a
350 /// plausible dependence but the dependence distance is bigger than one
351 /// element access it records this distance in \p MinDepDistBytes (if this
352 /// distance is smaller than any other distance encountered so far).
353 /// Otherwise, this function returns true signaling a possible dependence.
355 isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
356 unsigned BIdx,
358 &UnderlyingObjects);
359
360 /// Check whether the data dependence could prevent store-load
361 /// forwarding.
362 ///
363 /// \return false if we shouldn't vectorize at all or avoid larger
364 /// vectorization factors by limiting MinDepDistBytes.
365 bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
366
367 /// Updates the current safety status with \p S. We can go from Safe to
368 /// either PossiblySafeWithRtChecks or Unsafe and from
369 /// PossiblySafeWithRtChecks to Unsafe.
370 void mergeInStatus(VectorizationSafetyStatus S);
371
372 struct DepDistanceStrideAndSizeInfo {
373 const SCEV *Dist;
374 uint64_t StrideA;
375 uint64_t StrideB;
376 uint64_t TypeByteSize;
377 bool AIsWrite;
378 bool BIsWrite;
379
380 DepDistanceStrideAndSizeInfo(const SCEV *Dist, uint64_t StrideA,
381 uint64_t StrideB, uint64_t TypeByteSize,
382 bool AIsWrite, bool BIsWrite)
383 : Dist(Dist), StrideA(StrideA), StrideB(StrideB),
384 TypeByteSize(TypeByteSize), AIsWrite(AIsWrite), BIsWrite(BIsWrite) {}
385 };
386
387 /// Get the dependence distance, strides, type size and whether it is a write
388 /// for the dependence between A and B. Returns a DepType, if we can prove
389 /// there's no dependence or the analysis fails. Outlined to lambda to limit
390 /// he scope of various temporary variables, like A/BPtr, StrideA/BPtr and
391 /// others. Returns either the dependence result, if it could already be
392 /// determined, or a struct containing (Distance, Stride, TypeSize, AIsWrite,
393 /// BIsWrite).
394 std::variant<Dependence::DepType, DepDistanceStrideAndSizeInfo>
395 getDependenceDistanceStrideAndSize(
396 const MemAccessInfo &A, Instruction *AInst, const MemAccessInfo &B,
397 Instruction *BInst,
398 const DenseMap<Value *, SmallVector<const Value *, 16>>
399 &UnderlyingObjects);
400};
401
402class RuntimePointerChecking;
403/// A grouping of pointers. A single memcheck is required between
404/// two groups.
406 /// Create a new pointer checking group containing a single
407 /// pointer, with index \p Index in RtCheck.
409
410 /// Tries to add the pointer recorded in RtCheck at index
411 /// \p Index to this pointer checking group. We can only add a pointer
412 /// to a checking group if we will still be able to get
413 /// the upper and lower bounds of the check. Returns true in case
414 /// of success, false otherwise.
415 bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
416 bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
417 unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
418
419 /// The SCEV expression which represents the upper bound of all the
420 /// pointers in this group.
421 const SCEV *High;
422 /// The SCEV expression which represents the lower bound of all the
423 /// pointers in this group.
424 const SCEV *Low;
425 /// Indices of all the pointers that constitute this grouping.
427 /// Address space of the involved pointers.
428 unsigned AddressSpace;
429 /// Whether the pointer needs to be frozen after expansion, e.g. because it
430 /// may be poison outside the loop.
431 bool NeedsFreeze = false;
432};
433
434/// A memcheck which made up of a pair of grouped pointers.
435typedef std::pair<const RuntimeCheckingPtrGroup *,
438
442 unsigned AccessSize;
444
446 unsigned AccessSize, bool NeedsFreeze)
449};
450
451/// Holds information about the memory runtime legality checks to verify
452/// that a group of pointers do not overlap.
455
456public:
457 struct PointerInfo {
458 /// Holds the pointer value that we need to check.
460 /// Holds the smallest byte address accessed by the pointer throughout all
461 /// iterations of the loop.
462 const SCEV *Start;
463 /// Holds the largest byte address accessed by the pointer throughout all
464 /// iterations of the loop, plus 1.
465 const SCEV *End;
466 /// Holds the information if this pointer is used for writing to memory.
468 /// Holds the id of the set of pointers that could be dependent because of a
469 /// shared underlying object.
471 /// Holds the id of the disjoint alias set to which this pointer belongs.
472 unsigned AliasSetId;
473 /// SCEV for the access.
474 const SCEV *Expr;
475 /// True if the pointer expressions needs to be frozen after expansion.
477
479 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
480 const SCEV *Expr, bool NeedsFreeze)
484 };
485
487 : DC(DC), SE(SE) {}
488
489 /// Reset the state of the pointer runtime information.
490 void reset() {
491 Need = false;
492 Pointers.clear();
493 Checks.clear();
494 }
495
496 /// Insert a pointer and calculate the start and end SCEVs.
497 /// We need \p PSE in order to compute the SCEV expression of the pointer
498 /// according to the assumptions that we've made during the analysis.
499 /// The method might also version the pointer stride according to \p Strides,
500 /// and add new predicates to \p PSE.
501 void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
502 bool WritePtr, unsigned DepSetId, unsigned ASId,
503 PredicatedScalarEvolution &PSE, bool NeedsFreeze);
504
505 /// No run-time memory checking is necessary.
506 bool empty() const { return Pointers.empty(); }
507
508 /// Generate the checks and store it. This also performs the grouping
509 /// of pointers to reduce the number of memchecks necessary.
510 void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
511 bool UseDependencies);
512
513 /// Returns the checks that generateChecks created. They can be used to ensure
514 /// no read/write accesses overlap across all loop iterations.
516 return Checks;
517 }
518
519 // Returns an optional list of (pointer-difference expressions, access size)
520 // pairs that can be used to prove that there are no vectorization-preventing
521 // dependencies at runtime. There are is a vectorization-preventing dependency
522 // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
523 // std::nullopt if pointer-difference checks cannot be used.
524 std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
525 if (!CanUseDiffCheck)
526 return std::nullopt;
527 return {DiffChecks};
528 }
529
530 /// Decide if we need to add a check between two groups of pointers,
531 /// according to needsChecking.
533 const RuntimeCheckingPtrGroup &N) const;
534
535 /// Returns the number of run-time checks required according to
536 /// needsChecking.
537 unsigned getNumberOfChecks() const { return Checks.size(); }
538
539 /// Print the list run-time memory checks necessary.
540 void print(raw_ostream &OS, unsigned Depth = 0) const;
541
542 /// Print \p Checks.
545 unsigned Depth = 0) const;
546
547 /// This flag indicates if we need to add the runtime check.
548 bool Need = false;
549
550 /// Information about the pointers that may require checking.
552
553 /// Holds a partitioning of pointers into "check groups".
555
556 /// Check if pointers are in the same partition
557 ///
558 /// \p PtrToPartition contains the partition number for pointers (-1 if the
559 /// pointer belongs to multiple partitions).
560 static bool
562 unsigned PtrIdx1, unsigned PtrIdx2);
563
564 /// Decide whether we need to issue a run-time check for pointer at
565 /// index \p I and \p J to prove their independence.
566 bool needsChecking(unsigned I, unsigned J) const;
567
568 /// Return PointerInfo for pointer at index \p PtrIdx.
569 const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
570 return Pointers[PtrIdx];
571 }
572
573 ScalarEvolution *getSE() const { return SE; }
574
575private:
576 /// Groups pointers such that a single memcheck is required
577 /// between two different groups. This will clear the CheckingGroups vector
578 /// and re-compute it. We will only group dependecies if \p UseDependencies
579 /// is true, otherwise we will create a separate group for each pointer.
580 void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
581 bool UseDependencies);
582
583 /// Generate the checks and return them.
585
586 /// Try to create add a new (pointer-difference, access size) pair to
587 /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
588 /// checks cannot be used for the groups, set CanUseDiffCheck to false.
589 bool tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
590 const RuntimeCheckingPtrGroup &CGJ);
591
593
594 /// Holds a pointer to the ScalarEvolution analysis.
595 ScalarEvolution *SE;
596
597 /// Set of run-time checks required to establish independence of
598 /// otherwise may-aliasing pointers in the loop.
600
601 /// Flag indicating if pointer-difference checks can be used
602 bool CanUseDiffCheck = true;
603
604 /// A list of (pointer-difference, access size) pairs that can be used to
605 /// prove that there are no vectorization-preventing dependencies.
607};
608
609/// Drive the analysis of memory accesses in the loop
610///
611/// This class is responsible for analyzing the memory accesses of a loop. It
612/// collects the accesses and then its main helper the AccessAnalysis class
613/// finds and categorizes the dependences in buildDependenceSets.
614///
615/// For memory dependences that can be analyzed at compile time, it determines
616/// whether the dependence is part of cycle inhibiting vectorization. This work
617/// is delegated to the MemoryDepChecker class.
618///
619/// For memory dependences that cannot be determined at compile time, it
620/// generates run-time checks to prove independence. This is done by
621/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
622/// RuntimePointerCheck class.
623///
624/// If pointers can wrap or can't be expressed as affine AddRec expressions by
625/// ScalarEvolution, we will generate run-time checks by emitting a
626/// SCEVUnionPredicate.
627///
628/// Checks for both memory dependences and the SCEV predicates contained in the
629/// PSE must be emitted in order for the results of this analysis to be valid.
631public:
633 const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT,
634 LoopInfo *LI);
635
636 /// Return true we can analyze the memory accesses in the loop and there are
637 /// no memory dependence cycles. Note that for dependences between loads &
638 /// stores with uniform addresses,
639 /// hasStoreStoreDependenceInvolvingLoopInvariantAddress and
640 /// hasLoadStoreDependenceInvolvingLoopInvariantAddress also need to be
641 /// checked.
642 bool canVectorizeMemory() const { return CanVecMem; }
643
644 /// Return true if there is a convergent operation in the loop. There may
645 /// still be reported runtime pointer checks that would be required, but it is
646 /// not legal to insert them.
647 bool hasConvergentOp() const { return HasConvergentOp; }
648
650 return PtrRtChecking.get();
651 }
652
653 /// Number of memchecks required to prove independence of otherwise
654 /// may-alias pointers.
655 unsigned getNumRuntimePointerChecks() const {
656 return PtrRtChecking->getNumberOfChecks();
657 }
658
659 /// Return true if the block BB needs to be predicated in order for the loop
660 /// to be vectorized.
661 static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
662 DominatorTree *DT);
663
664 /// Returns true if value \p V is loop invariant.
665 bool isInvariant(Value *V) const;
666
667 unsigned getNumStores() const { return NumStores; }
668 unsigned getNumLoads() const { return NumLoads;}
669
670 /// The diagnostics report generated for the analysis. E.g. why we
671 /// couldn't analyze the loop.
672 const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
673
674 /// the Memory Dependence Checker which can determine the
675 /// loop-independent and loop-carried dependences between memory accesses.
676 const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
677
678 /// Return the list of instructions that use \p Ptr to read or write
679 /// memory.
681 bool isWrite) const {
682 return DepChecker->getInstructionsForAccess(Ptr, isWrite);
683 }
684
685 /// If an access has a symbolic strides, this maps the pointer value to
686 /// the stride symbol.
688 return SymbolicStrides;
689 }
690
691 /// Print the information about the memory accesses in the loop.
692 void print(raw_ostream &OS, unsigned Depth = 0) const;
693
694 /// Return true if the loop has memory dependence involving two stores to an
695 /// invariant address, else return false.
697 return HasStoreStoreDependenceInvolvingLoopInvariantAddress;
698 }
699
700 /// Return true if the loop has memory dependence involving a load and a store
701 /// to an invariant address, else return false.
703 return HasLoadStoreDependenceInvolvingLoopInvariantAddress;
704 }
705
706 /// Return the list of stores to invariant addresses.
708 return StoresToInvariantAddresses;
709 }
710
711 /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
712 /// them to a more usable form. All SCEV expressions during the analysis
713 /// should be re-written (and therefore simplified) according to PSE.
714 /// A user of LoopAccessAnalysis will need to emit the runtime checks
715 /// associated with this predicate.
716 const PredicatedScalarEvolution &getPSE() const { return *PSE; }
717
718private:
719 /// Analyze the loop. Returns true if all memory access in the loop can be
720 /// vectorized.
721 bool analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI,
722 DominatorTree *DT);
723
724 /// Check if the structure of the loop allows it to be analyzed by this
725 /// pass.
726 bool canAnalyzeLoop();
727
728 /// Save the analysis remark.
729 ///
730 /// LAA does not directly emits the remarks. Instead it stores it which the
731 /// client can retrieve and presents as its own analysis
732 /// (e.g. -Rpass-analysis=loop-vectorize).
733 OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
734 Instruction *Instr = nullptr);
735
736 /// Collect memory access with loop invariant strides.
737 ///
738 /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
739 /// invariant.
740 void collectStridedAccess(Value *LoadOrStoreInst);
741
742 // Emits the first unsafe memory dependence in a loop.
743 // Emits nothing if there are no unsafe dependences
744 // or if the dependences were not recorded.
745 void emitUnsafeDependenceRemark();
746
747 std::unique_ptr<PredicatedScalarEvolution> PSE;
748
749 /// We need to check that all of the pointers in this list are disjoint
750 /// at runtime. Using std::unique_ptr to make using move ctor simpler.
751 std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
752
753 /// the Memory Dependence Checker which can determine the
754 /// loop-independent and loop-carried dependences between memory accesses.
755 std::unique_ptr<MemoryDepChecker> DepChecker;
756
757 Loop *TheLoop;
758
759 unsigned NumLoads = 0;
760 unsigned NumStores = 0;
761
762 /// Cache the result of analyzeLoop.
763 bool CanVecMem = false;
764 bool HasConvergentOp = false;
765
766 /// Indicator that there are two non vectorizable stores to the same uniform
767 /// address.
768 bool HasStoreStoreDependenceInvolvingLoopInvariantAddress = false;
769 /// Indicator that there is non vectorizable load and store to the same
770 /// uniform address.
771 bool HasLoadStoreDependenceInvolvingLoopInvariantAddress = false;
772
773 /// List of stores to invariant addresses.
774 SmallVector<StoreInst *> StoresToInvariantAddresses;
775
776 /// The diagnostics report generated for the analysis. E.g. why we
777 /// couldn't analyze the loop.
778 std::unique_ptr<OptimizationRemarkAnalysis> Report;
779
780 /// If an access has a symbolic strides, this maps the pointer value to
781 /// the stride symbol.
782 DenseMap<Value *, const SCEV *> SymbolicStrides;
783};
784
785/// Return the SCEV corresponding to a pointer with the symbolic stride
786/// replaced with constant one, assuming the SCEV predicate associated with
787/// \p PSE is true.
788///
789/// If necessary this method will version the stride of the pointer according
790/// to \p PtrToStride and therefore add further predicates to \p PSE.
791///
792/// \p PtrToStride provides the mapping between the pointer value and its
793/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
794const SCEV *
795replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
796 const DenseMap<Value *, const SCEV *> &PtrToStride,
797 Value *Ptr);
798
799/// If the pointer has a constant stride return it in units of the access type
800/// size. Otherwise return std::nullopt.
801///
802/// Ensure that it does not wrap in the address space, assuming the predicate
803/// associated with \p PSE is true.
804///
805/// If necessary this method will version the stride of the pointer according
806/// to \p PtrToStride and therefore add further predicates to \p PSE.
807/// The \p Assume parameter indicates if we are allowed to make additional
808/// run-time assumptions.
809///
810/// Note that the analysis results are defined if-and-only-if the original
811/// memory access was defined. If that access was dead, or UB, then the
812/// result of this function is undefined.
813std::optional<int64_t>
814getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
815 const Loop *Lp,
816 const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(),
817 bool Assume = false, bool ShouldCheckWrap = true);
818
819/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
820/// compatible and it is possible to calculate the distance between them. This
821/// is a simple API that does not depend on the analysis pass.
822/// \param StrictCheck Ensure that the calculated distance matches the
823/// type-based one after all the bitcasts removal in the provided pointers.
824std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
825 Value *PtrB, const DataLayout &DL,
826 ScalarEvolution &SE,
827 bool StrictCheck = false,
828 bool CheckType = true);
829
830/// Attempt to sort the pointers in \p VL and return the sorted indices
831/// in \p SortedIndices, if reordering is required.
832///
833/// Returns 'true' if sorting is legal, otherwise returns 'false'.
834///
835/// For example, for a given \p VL of memory accesses in program order, a[i+4],
836/// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
837/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
838/// saves the mask for actual memory accesses in program order in
839/// \p SortedIndices as <1,2,0,3>
840bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
841 ScalarEvolution &SE,
842 SmallVectorImpl<unsigned> &SortedIndices);
843
844/// Returns true if the memory operations \p A and \p B are consecutive.
845/// This is a simple API that does not depend on the analysis pass.
846bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
847 ScalarEvolution &SE, bool CheckType = true);
848
850 /// The cache.
852
853 // The used analysis passes.
854 ScalarEvolution &SE;
855 AAResults &AA;
856 DominatorTree &DT;
857 LoopInfo &LI;
859 const TargetLibraryInfo *TLI = nullptr;
860
861public:
864 const TargetLibraryInfo *TLI)
865 : SE(SE), AA(AA), DT(DT), LI(LI), TTI(TTI), TLI(TLI) {}
866
867 const LoopAccessInfo &getInfo(Loop &L);
868
869 void clear();
870
871 bool invalidate(Function &F, const PreservedAnalyses &PA,
873};
874
875/// This analysis provides dependence information for the memory
876/// accesses of a loop.
877///
878/// It runs the analysis for a loop on demand. This can be initiated by
879/// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
880/// getResult return a LoopAccessInfo object. See this class for the
881/// specifics of what information is provided.
883 : public AnalysisInfoMixin<LoopAccessAnalysis> {
885 static AnalysisKey Key;
886
887public:
889
891};
892
894 const MemoryDepChecker &DepChecker) const {
895 return DepChecker.getMemoryInstructions()[Source];
896}
897
899 const MemoryDepChecker &DepChecker) const {
900 return DepChecker.getMemoryInstructions()[Destination];
901}
902
903} // End llvm namespace
904
905#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MapVector< const Value *, unsigned > OrderMap
Definition: AsmWriter.cpp:99
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool End
Definition: ELF_riscv.cpp:480
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(MVT::SimpleValueType VT, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:292
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
An instruction for reading from memory.
Definition: Instructions.h:174
This analysis provides dependence information for the memory accesses of a loop.
LoopAccessInfoManager Result
Result run(Function &F, FunctionAnalysisManager &AM)
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
const LoopAccessInfo & getInfo(Loop &L)
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, TargetTransformInfo *TTI, const TargetLibraryInfo *TLI)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
unsigned getNumLoads() const
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
bool hasLoadStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving a load and a store to an invariant address,...
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
unsigned getNumStores() const
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasStoreStoreDependenceInvolvingLoopInvariantAddress() const
Return true if the loop has memory dependence involving two stores to an invariant address,...
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const DenseMap< Value *, SmallVector< const Value *, 16 > > &UnderlyingObjects)
Check whether the dependencies between the accesses are safe.
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, const DenseMap< Value *, const SCEV * > &SymbolicStrides, unsigned MaxTargetVectorWidthInBits)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
DenseMap< std::pair< const SCEV *, Type * >, std::pair< const SCEV *, const SCEV * > > & getPointerBounds()
SmallVector< MemAccessInfo, 8 > MemAccessInfoList
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
bool shouldRetryWithRuntimeCheck() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
PointerIntPair< Value *, 1, bool > MemAccessInfo
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
std::optional< ArrayRef< PointerDiffInfo > > getDiffChecks() const
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
bool empty() const
No run-time memory checking is necessary.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
ScalarEvolution * getSE() const
void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
#define N
IR Values for the lower and upper bounds of a pointer evolution.
Definition: LoopUtils.cpp:1798
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:92
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:28
Dependece between memory access instructions.
Instruction * getDestination(const MemoryDepChecker &DepChecker) const
Return the destination instruction of the dependence.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
Dependence(unsigned Source, unsigned Destination, DepType Type)
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
Instruction * getSource(const MemoryDepChecker &DepChecker) const
Return the source instruction of the dependence.
bool isForward() const
Lexically forward dependence.
bool isBackward() const
Lexically backward dependence.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static const char * DepName[]
String version of the types.
PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, unsigned AccessSize, bool NeedsFreeze)
unsigned AddressSpace
Address space of the involved pointers.
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, const SCEV *Expr, bool NeedsFreeze)
const SCEV * Start
Holds the smallest byte address accessed by the pointer throughout all iterations of the loop.
const SCEV * Expr
SCEV for the access.
bool NeedsFreeze
True if the pointer expressions needs to be frozen after expansion.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
const SCEV * End
Holds the largest byte address accessed by the pointer throughout all iterations of the loop,...
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static unsigned VectorizationFactor
VF as overridden by the user.
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.