LLVM 19.0.0git
LoopAccessAnalysis.h
Go to the documentation of this file.
1//===- llvm/Analysis/LoopAccessAnalysis.h -----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interface for the loop memory dependence framework that
10// was originally developed for the Loop Vectorizer.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
15#define LLVM_ANALYSIS_LOOPACCESSANALYSIS_H
16
21#include <optional>
22
23namespace llvm {
24
25class AAResults;
26class DataLayout;
27class Loop;
28class LoopAccessInfo;
29class raw_ostream;
30class SCEV;
31class SCEVUnionPredicate;
32class Value;
33
34/// Collection of parameters shared beetween the Loop Vectorizer and the
35/// Loop Access Analysis.
37 /// Maximum SIMD width.
38 static const unsigned MaxVectorWidth;
39
40 /// VF as overridden by the user.
41 static unsigned VectorizationFactor;
42 /// Interleave factor as overridden by the user.
43 static unsigned VectorizationInterleave;
44 /// True if force-vector-interleave was specified by the user.
45 static bool isInterleaveForced();
46
47 /// \When performing memory disambiguation checks at runtime do not
48 /// make more than this number of comparisons.
50
51 // When creating runtime checks for nested loops, where possible try to
52 // write the checks in a form that allows them to be easily hoisted out of
53 // the outermost loop. For example, we can do this by expanding the range of
54 // addresses considered to include the entire nested loop so that they are
55 // loop invariant.
56 static bool HoistRuntimeChecks;
57};
58
59/// Checks memory dependences among accesses to the same underlying
60/// object to determine whether there vectorization is legal or not (and at
61/// which vectorization factor).
62///
63/// Note: This class will compute a conservative dependence for access to
64/// different underlying pointers. Clients, such as the loop vectorizer, will
65/// sometimes deal these potential dependencies by emitting runtime checks.
66///
67/// We use the ScalarEvolution framework to symbolically evalutate access
68/// functions pairs. Since we currently don't restructure the loop we can rely
69/// on the program order of memory accesses to determine their safety.
70/// At the moment we will only deem accesses as safe for:
71/// * A negative constant distance assuming program order.
72///
73/// Safe: tmp = a[i + 1]; OR a[i + 1] = x;
74/// a[i] = tmp; y = a[i];
75///
76/// The latter case is safe because later checks guarantuee that there can't
77/// be a cycle through a phi node (that is, we check that "x" and "y" is not
78/// the same variable: a header phi can only be an induction or a reduction, a
79/// reduction can't have a memory sink, an induction can't have a memory
80/// source). This is important and must not be violated (or we have to
81/// resort to checking for cycles through memory).
82///
83/// * A positive constant distance assuming program order that is bigger
84/// than the biggest memory access.
85///
86/// tmp = a[i] OR b[i] = x
87/// a[i+2] = tmp y = b[i+2];
88///
89/// Safe distance: 2 x sizeof(a[0]), and 2 x sizeof(b[0]), respectively.
90///
91/// * Zero distances and all accesses have the same size.
92///
94public:
97 /// Set of potential dependent memory accesses.
99
100 /// Type to keep track of the status of the dependence check. The order of
101 /// the elements is important and has to be from most permissive to least
102 /// permissive.
104 // Can vectorize safely without RT checks. All dependences are known to be
105 // safe.
106 Safe,
107 // Can possibly vectorize with RT checks to overcome unknown dependencies.
109 // Cannot vectorize due to known unsafe dependencies.
110 Unsafe,
111 };
112
113 /// Dependece between memory access instructions.
114 struct Dependence {
115 /// The type of the dependence.
116 enum DepType {
117 // No dependence.
119 // We couldn't determine the direction or the distance.
121 // At least one of the memory access instructions may access a loop
122 // varying object, e.g. the address of underlying object is loaded inside
123 // the loop, like A[B[i]]. We cannot determine direction or distance in
124 // those cases, and also are unable to generate any runtime checks.
126
127 // Lexically forward.
128 //
129 // FIXME: If we only have loop-independent forward dependences (e.g. a
130 // read and write of A[i]), LAA will locally deem the dependence "safe"
131 // without querying the MemoryDepChecker. Therefore we can miss
132 // enumerating loop-independent forward dependences in
133 // getDependences. Note that as soon as there are different
134 // indices used to access the same array, the MemoryDepChecker *is*
135 // queried and the dependence list is complete.
137 // Forward, but if vectorized, is likely to prevent store-to-load
138 // forwarding.
140 // Lexically backward.
142 // Backward, but the distance allows a vectorization factor of dependent
143 // on MinDepDistBytes.
145 // Same, but may prevent store-to-load forwarding.
147 };
148
149 /// String version of the types.
150 static const char *DepName[];
151
152 /// Index of the source of the dependence in the InstMap vector.
153 unsigned Source;
154 /// Index of the destination of the dependence in the InstMap vector.
155 unsigned Destination;
156 /// The type of the dependence.
158
161
162 /// Return the source instruction of the dependence.
163 Instruction *getSource(const LoopAccessInfo &LAI) const;
164 /// Return the destination instruction of the dependence.
165 Instruction *getDestination(const LoopAccessInfo &LAI) const;
166
167 /// Dependence types that don't prevent vectorization.
169
170 /// Lexically forward dependence.
171 bool isForward() const;
172 /// Lexically backward dependence.
173 bool isBackward() const;
174
175 /// May be a lexically backward dependence type (includes Unknown).
176 bool isPossiblyBackward() const;
177
178 /// Print the dependence. \p Instr is used to map the instruction
179 /// indices to instructions.
180 void print(raw_ostream &OS, unsigned Depth,
181 const SmallVectorImpl<Instruction *> &Instrs) const;
182 };
183
185 : PSE(PSE), InnermostLoop(L) {}
186
187 /// Register the location (instructions are given increasing numbers)
188 /// of a write access.
189 void addAccess(StoreInst *SI);
190
191 /// Register the location (instructions are given increasing numbers)
192 /// of a write access.
193 void addAccess(LoadInst *LI);
194
195 /// Check whether the dependencies between the accesses are safe.
196 ///
197 /// Only checks sets with elements in \p CheckDeps.
198 bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
199 const DenseMap<Value *, const SCEV *> &Strides,
201 &UnderlyingObjects);
202
203 /// No memory dependence was encountered that would inhibit
204 /// vectorization.
207 }
208
209 /// Return true if the number of elements that are safe to operate on
210 /// simultaneously is not bounded.
212 return MaxSafeVectorWidthInBits == UINT_MAX;
213 }
214
215 /// Return the number of elements that are safe to operate on
216 /// simultaneously, multiplied by the size of the element in bits.
218 return MaxSafeVectorWidthInBits;
219 }
220
221 /// In same cases when the dependency check fails we can still
222 /// vectorize the loop with a dynamic array access check.
224 return FoundNonConstantDistanceDependence &&
226 }
227
228 /// Returns the memory dependences. If null is returned we exceeded
229 /// the MaxDependences threshold and this information is not
230 /// available.
232 return RecordDependences ? &Dependences : nullptr;
233 }
234
235 void clearDependences() { Dependences.clear(); }
236
237 /// The vector of memory access instructions. The indices are used as
238 /// instruction identifiers in the Dependence class.
240 return InstMap;
241 }
242
243 /// Generate a mapping between the memory instructions and their
244 /// indices according to program order.
247
248 for (unsigned I = 0; I < InstMap.size(); ++I)
249 OrderMap[InstMap[I]] = I;
250
251 return OrderMap;
252 }
253
254 /// Find the set of instructions that read or write via \p Ptr.
256 bool isWrite) const;
257
258 /// Return the program order indices for the access location (Ptr, IsWrite).
259 /// Returns an empty ArrayRef if there are no accesses for the location.
261 auto I = Accesses.find({Ptr, IsWrite});
262 if (I != Accesses.end())
263 return I->second;
264 return {};
265 }
266
267 const Loop *getInnermostLoop() const { return InnermostLoop; }
268
269private:
270 /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and
271 /// applies dynamic knowledge to simplify SCEV expressions and convert them
272 /// to a more usable form. We need this in case assumptions about SCEV
273 /// expressions need to be made in order to avoid unknown dependences. For
274 /// example we might assume a unit stride for a pointer in order to prove
275 /// that a memory access is strided and doesn't wrap.
277 const Loop *InnermostLoop;
278
279 /// Maps access locations (ptr, read/write) to program order.
281
282 /// Memory access instructions in program order.
284
285 /// The program order index to be used for the next instruction.
286 unsigned AccessIdx = 0;
287
288 /// The smallest dependence distance in bytes in the loop. This may not be
289 /// the same as the maximum number of bytes that are safe to operate on
290 /// simultaneously.
291 uint64_t MinDepDistBytes = 0;
292
293 /// Number of elements (from consecutive iterations) that are safe to
294 /// operate on simultaneously, multiplied by the size of the element in bits.
295 /// The size of the element is taken from the memory access that is most
296 /// restrictive.
297 uint64_t MaxSafeVectorWidthInBits = -1U;
298
299 /// If we see a non-constant dependence distance we can still try to
300 /// vectorize this loop with runtime checks.
301 bool FoundNonConstantDistanceDependence = false;
302
303 /// Result of the dependence checks, indicating whether the checked
304 /// dependences are safe for vectorization, require RT checks or are known to
305 /// be unsafe.
307
308 //// True if Dependences reflects the dependences in the
309 //// loop. If false we exceeded MaxDependences and
310 //// Dependences is invalid.
311 bool RecordDependences = true;
312
313 /// Memory dependences collected during the analysis. Only valid if
314 /// RecordDependences is true.
315 SmallVector<Dependence, 8> Dependences;
316
317 /// Check whether there is a plausible dependence between the two
318 /// accesses.
319 ///
320 /// Access \p A must happen before \p B in program order. The two indices
321 /// identify the index into the program order map.
322 ///
323 /// This function checks whether there is a plausible dependence (or the
324 /// absence of such can't be proved) between the two accesses. If there is a
325 /// plausible dependence but the dependence distance is bigger than one
326 /// element access it records this distance in \p MinDepDistBytes (if this
327 /// distance is smaller than any other distance encountered so far).
328 /// Otherwise, this function returns true signaling a possible dependence.
330 isDependent(const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
331 unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
333 &UnderlyingObjects);
334
335 /// Check whether the data dependence could prevent store-load
336 /// forwarding.
337 ///
338 /// \return false if we shouldn't vectorize at all or avoid larger
339 /// vectorization factors by limiting MinDepDistBytes.
340 bool couldPreventStoreLoadForward(uint64_t Distance, uint64_t TypeByteSize);
341
342 /// Updates the current safety status with \p S. We can go from Safe to
343 /// either PossiblySafeWithRtChecks or Unsafe and from
344 /// PossiblySafeWithRtChecks to Unsafe.
345 void mergeInStatus(VectorizationSafetyStatus S);
346};
347
348class RuntimePointerChecking;
349/// A grouping of pointers. A single memcheck is required between
350/// two groups.
352 /// Create a new pointer checking group containing a single
353 /// pointer, with index \p Index in RtCheck.
355
356 /// Tries to add the pointer recorded in RtCheck at index
357 /// \p Index to this pointer checking group. We can only add a pointer
358 /// to a checking group if we will still be able to get
359 /// the upper and lower bounds of the check. Returns true in case
360 /// of success, false otherwise.
361 bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck);
362 bool addPointer(unsigned Index, const SCEV *Start, const SCEV *End,
363 unsigned AS, bool NeedsFreeze, ScalarEvolution &SE);
364
365 /// The SCEV expression which represents the upper bound of all the
366 /// pointers in this group.
367 const SCEV *High;
368 /// The SCEV expression which represents the lower bound of all the
369 /// pointers in this group.
370 const SCEV *Low;
371 /// Indices of all the pointers that constitute this grouping.
373 /// Address space of the involved pointers.
374 unsigned AddressSpace;
375 /// Whether the pointer needs to be frozen after expansion, e.g. because it
376 /// may be poison outside the loop.
377 bool NeedsFreeze = false;
378};
379
380/// A memcheck which made up of a pair of grouped pointers.
381typedef std::pair<const RuntimeCheckingPtrGroup *,
384
388 unsigned AccessSize;
390
392 unsigned AccessSize, bool NeedsFreeze)
395};
396
397/// Holds information about the memory runtime legality checks to verify
398/// that a group of pointers do not overlap.
401
402public:
403 struct PointerInfo {
404 /// Holds the pointer value that we need to check.
406 /// Holds the smallest byte address accessed by the pointer throughout all
407 /// iterations of the loop.
408 const SCEV *Start;
409 /// Holds the largest byte address accessed by the pointer throughout all
410 /// iterations of the loop, plus 1.
411 const SCEV *End;
412 /// Holds the information if this pointer is used for writing to memory.
414 /// Holds the id of the set of pointers that could be dependent because of a
415 /// shared underlying object.
417 /// Holds the id of the disjoint alias set to which this pointer belongs.
418 unsigned AliasSetId;
419 /// SCEV for the access.
420 const SCEV *Expr;
421 /// True if the pointer expressions needs to be frozen after expansion.
423
425 bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId,
426 const SCEV *Expr, bool NeedsFreeze)
430 };
431
433 : DC(DC), SE(SE) {}
434
435 /// Reset the state of the pointer runtime information.
436 void reset() {
437 Need = false;
438 Pointers.clear();
439 Checks.clear();
440 }
441
442 /// Insert a pointer and calculate the start and end SCEVs.
443 /// We need \p PSE in order to compute the SCEV expression of the pointer
444 /// according to the assumptions that we've made during the analysis.
445 /// The method might also version the pointer stride according to \p Strides,
446 /// and add new predicates to \p PSE.
447 void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy,
448 bool WritePtr, unsigned DepSetId, unsigned ASId,
449 PredicatedScalarEvolution &PSE, bool NeedsFreeze);
450
451 /// No run-time memory checking is necessary.
452 bool empty() const { return Pointers.empty(); }
453
454 /// Generate the checks and store it. This also performs the grouping
455 /// of pointers to reduce the number of memchecks necessary.
456 void generateChecks(MemoryDepChecker::DepCandidates &DepCands,
457 bool UseDependencies);
458
459 /// Returns the checks that generateChecks created. They can be used to ensure
460 /// no read/write accesses overlap across all loop iterations.
462 return Checks;
463 }
464
465 // Returns an optional list of (pointer-difference expressions, access size)
466 // pairs that can be used to prove that there are no vectorization-preventing
467 // dependencies at runtime. There are is a vectorization-preventing dependency
468 // if any pointer-difference is <u VF * InterleaveCount * access size. Returns
469 // std::nullopt if pointer-difference checks cannot be used.
470 std::optional<ArrayRef<PointerDiffInfo>> getDiffChecks() const {
471 if (!CanUseDiffCheck)
472 return std::nullopt;
473 return {DiffChecks};
474 }
475
476 /// Decide if we need to add a check between two groups of pointers,
477 /// according to needsChecking.
479 const RuntimeCheckingPtrGroup &N) const;
480
481 /// Returns the number of run-time checks required according to
482 /// needsChecking.
483 unsigned getNumberOfChecks() const { return Checks.size(); }
484
485 /// Print the list run-time memory checks necessary.
486 void print(raw_ostream &OS, unsigned Depth = 0) const;
487
488 /// Print \p Checks.
491 unsigned Depth = 0) const;
492
493 /// This flag indicates if we need to add the runtime check.
494 bool Need = false;
495
496 /// Information about the pointers that may require checking.
498
499 /// Holds a partitioning of pointers into "check groups".
501
502 /// Check if pointers are in the same partition
503 ///
504 /// \p PtrToPartition contains the partition number for pointers (-1 if the
505 /// pointer belongs to multiple partitions).
506 static bool
508 unsigned PtrIdx1, unsigned PtrIdx2);
509
510 /// Decide whether we need to issue a run-time check for pointer at
511 /// index \p I and \p J to prove their independence.
512 bool needsChecking(unsigned I, unsigned J) const;
513
514 /// Return PointerInfo for pointer at index \p PtrIdx.
515 const PointerInfo &getPointerInfo(unsigned PtrIdx) const {
516 return Pointers[PtrIdx];
517 }
518
519 ScalarEvolution *getSE() const { return SE; }
520
521private:
522 /// Groups pointers such that a single memcheck is required
523 /// between two different groups. This will clear the CheckingGroups vector
524 /// and re-compute it. We will only group dependecies if \p UseDependencies
525 /// is true, otherwise we will create a separate group for each pointer.
526 void groupChecks(MemoryDepChecker::DepCandidates &DepCands,
527 bool UseDependencies);
528
529 /// Generate the checks and return them.
531
532 /// Try to create add a new (pointer-difference, access size) pair to
533 /// DiffCheck for checking groups \p CGI and \p CGJ. If pointer-difference
534 /// checks cannot be used for the groups, set CanUseDiffCheck to false.
535 void tryToCreateDiffCheck(const RuntimeCheckingPtrGroup &CGI,
536 const RuntimeCheckingPtrGroup &CGJ);
537
539
540 /// Holds a pointer to the ScalarEvolution analysis.
541 ScalarEvolution *SE;
542
543 /// Set of run-time checks required to establish independence of
544 /// otherwise may-aliasing pointers in the loop.
546
547 /// Flag indicating if pointer-difference checks can be used
548 bool CanUseDiffCheck = true;
549
550 /// A list of (pointer-difference, access size) pairs that can be used to
551 /// prove that there are no vectorization-preventing dependencies.
553};
554
555/// Drive the analysis of memory accesses in the loop
556///
557/// This class is responsible for analyzing the memory accesses of a loop. It
558/// collects the accesses and then its main helper the AccessAnalysis class
559/// finds and categorizes the dependences in buildDependenceSets.
560///
561/// For memory dependences that can be analyzed at compile time, it determines
562/// whether the dependence is part of cycle inhibiting vectorization. This work
563/// is delegated to the MemoryDepChecker class.
564///
565/// For memory dependences that cannot be determined at compile time, it
566/// generates run-time checks to prove independence. This is done by
567/// AccessAnalysis::canCheckPtrAtRT and the checks are maintained by the
568/// RuntimePointerCheck class.
569///
570/// If pointers can wrap or can't be expressed as affine AddRec expressions by
571/// ScalarEvolution, we will generate run-time checks by emitting a
572/// SCEVUnionPredicate.
573///
574/// Checks for both memory dependences and the SCEV predicates contained in the
575/// PSE must be emitted in order for the results of this analysis to be valid.
577public:
579 AAResults *AA, DominatorTree *DT, LoopInfo *LI);
580
581 /// Return true we can analyze the memory accesses in the loop and there are
582 /// no memory dependence cycles.
583 bool canVectorizeMemory() const { return CanVecMem; }
584
585 /// Return true if there is a convergent operation in the loop. There may
586 /// still be reported runtime pointer checks that would be required, but it is
587 /// not legal to insert them.
588 bool hasConvergentOp() const { return HasConvergentOp; }
589
591 return PtrRtChecking.get();
592 }
593
594 /// Number of memchecks required to prove independence of otherwise
595 /// may-alias pointers.
596 unsigned getNumRuntimePointerChecks() const {
597 return PtrRtChecking->getNumberOfChecks();
598 }
599
600 /// Return true if the block BB needs to be predicated in order for the loop
601 /// to be vectorized.
602 static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
603 DominatorTree *DT);
604
605 /// Returns true if value \p V is loop invariant.
606 bool isInvariant(Value *V) const;
607
608 unsigned getNumStores() const { return NumStores; }
609 unsigned getNumLoads() const { return NumLoads;}
610
611 /// The diagnostics report generated for the analysis. E.g. why we
612 /// couldn't analyze the loop.
613 const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
614
615 /// the Memory Dependence Checker which can determine the
616 /// loop-independent and loop-carried dependences between memory accesses.
617 const MemoryDepChecker &getDepChecker() const { return *DepChecker; }
618
619 /// Return the list of instructions that use \p Ptr to read or write
620 /// memory.
622 bool isWrite) const {
623 return DepChecker->getInstructionsForAccess(Ptr, isWrite);
624 }
625
626 /// If an access has a symbolic strides, this maps the pointer value to
627 /// the stride symbol.
629 return SymbolicStrides;
630 }
631
632 /// Print the information about the memory accesses in the loop.
633 void print(raw_ostream &OS, unsigned Depth = 0) const;
634
635 /// If the loop has memory dependence involving an invariant address, i.e. two
636 /// stores or a store and a load, then return true, else return false.
638 return HasDependenceInvolvingLoopInvariantAddress;
639 }
640
641 /// Return the list of stores to invariant addresses.
643 return StoresToInvariantAddresses;
644 }
645
646 /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts
647 /// them to a more usable form. All SCEV expressions during the analysis
648 /// should be re-written (and therefore simplified) according to PSE.
649 /// A user of LoopAccessAnalysis will need to emit the runtime checks
650 /// associated with this predicate.
651 const PredicatedScalarEvolution &getPSE() const { return *PSE; }
652
653private:
654 /// Analyze the loop.
655 void analyzeLoop(AAResults *AA, LoopInfo *LI,
656 const TargetLibraryInfo *TLI, DominatorTree *DT);
657
658 /// Check if the structure of the loop allows it to be analyzed by this
659 /// pass.
660 bool canAnalyzeLoop();
661
662 /// Save the analysis remark.
663 ///
664 /// LAA does not directly emits the remarks. Instead it stores it which the
665 /// client can retrieve and presents as its own analysis
666 /// (e.g. -Rpass-analysis=loop-vectorize).
667 OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
668 Instruction *Instr = nullptr);
669
670 /// Collect memory access with loop invariant strides.
671 ///
672 /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
673 /// invariant.
674 void collectStridedAccess(Value *LoadOrStoreInst);
675
676 // Emits the first unsafe memory dependence in a loop.
677 // Emits nothing if there are no unsafe dependences
678 // or if the dependences were not recorded.
679 void emitUnsafeDependenceRemark();
680
681 std::unique_ptr<PredicatedScalarEvolution> PSE;
682
683 /// We need to check that all of the pointers in this list are disjoint
684 /// at runtime. Using std::unique_ptr to make using move ctor simpler.
685 std::unique_ptr<RuntimePointerChecking> PtrRtChecking;
686
687 /// the Memory Dependence Checker which can determine the
688 /// loop-independent and loop-carried dependences between memory accesses.
689 std::unique_ptr<MemoryDepChecker> DepChecker;
690
691 Loop *TheLoop;
692
693 unsigned NumLoads = 0;
694 unsigned NumStores = 0;
695
696 /// Cache the result of analyzeLoop.
697 bool CanVecMem = false;
698 bool HasConvergentOp = false;
699
700 /// Indicator that there are non vectorizable stores to a uniform address.
701 bool HasDependenceInvolvingLoopInvariantAddress = false;
702
703 /// List of stores to invariant addresses.
704 SmallVector<StoreInst *> StoresToInvariantAddresses;
705
706 /// The diagnostics report generated for the analysis. E.g. why we
707 /// couldn't analyze the loop.
708 std::unique_ptr<OptimizationRemarkAnalysis> Report;
709
710 /// If an access has a symbolic strides, this maps the pointer value to
711 /// the stride symbol.
712 DenseMap<Value *, const SCEV *> SymbolicStrides;
713};
714
715/// Return the SCEV corresponding to a pointer with the symbolic stride
716/// replaced with constant one, assuming the SCEV predicate associated with
717/// \p PSE is true.
718///
719/// If necessary this method will version the stride of the pointer according
720/// to \p PtrToStride and therefore add further predicates to \p PSE.
721///
722/// \p PtrToStride provides the mapping between the pointer value and its
723/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
724const SCEV *
725replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
726 const DenseMap<Value *, const SCEV *> &PtrToStride,
727 Value *Ptr);
728
729/// If the pointer has a constant stride return it in units of the access type
730/// size. Otherwise return std::nullopt.
731///
732/// Ensure that it does not wrap in the address space, assuming the predicate
733/// associated with \p PSE is true.
734///
735/// If necessary this method will version the stride of the pointer according
736/// to \p PtrToStride and therefore add further predicates to \p PSE.
737/// The \p Assume parameter indicates if we are allowed to make additional
738/// run-time assumptions.
739///
740/// Note that the analysis results are defined if-and-only-if the original
741/// memory access was defined. If that access was dead, or UB, then the
742/// result of this function is undefined.
743std::optional<int64_t>
744getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
745 const Loop *Lp,
746 const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(),
747 bool Assume = false, bool ShouldCheckWrap = true);
748
749/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
750/// compatible and it is possible to calculate the distance between them. This
751/// is a simple API that does not depend on the analysis pass.
752/// \param StrictCheck Ensure that the calculated distance matches the
753/// type-based one after all the bitcasts removal in the provided pointers.
754std::optional<int> getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB,
755 Value *PtrB, const DataLayout &DL,
756 ScalarEvolution &SE,
757 bool StrictCheck = false,
758 bool CheckType = true);
759
760/// Attempt to sort the pointers in \p VL and return the sorted indices
761/// in \p SortedIndices, if reordering is required.
762///
763/// Returns 'true' if sorting is legal, otherwise returns 'false'.
764///
765/// For example, for a given \p VL of memory accesses in program order, a[i+4],
766/// a[i+0], a[i+1] and a[i+7], this function will sort the \p VL and save the
767/// sorted indices in \p SortedIndices as a[i+0], a[i+1], a[i+4], a[i+7] and
768/// saves the mask for actual memory accesses in program order in
769/// \p SortedIndices as <1,2,0,3>
770bool sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, const DataLayout &DL,
771 ScalarEvolution &SE,
772 SmallVectorImpl<unsigned> &SortedIndices);
773
774/// Returns true if the memory operations \p A and \p B are consecutive.
775/// This is a simple API that does not depend on the analysis pass.
776bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
777 ScalarEvolution &SE, bool CheckType = true);
778
780 /// The cache.
782
783 // The used analysis passes.
784 ScalarEvolution &SE;
785 AAResults &AA;
786 DominatorTree &DT;
787 LoopInfo &LI;
788 const TargetLibraryInfo *TLI = nullptr;
789
790public:
792 LoopInfo &LI, const TargetLibraryInfo *TLI)
793 : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {}
794
795 const LoopAccessInfo &getInfo(Loop &L);
796
797 void clear() { LoopAccessInfoMap.clear(); }
798
799 bool invalidate(Function &F, const PreservedAnalyses &PA,
801};
802
803/// This analysis provides dependence information for the memory
804/// accesses of a loop.
805///
806/// It runs the analysis for a loop on demand. This can be initiated by
807/// querying the loop access info via AM.getResult<LoopAccessAnalysis>.
808/// getResult return a LoopAccessInfo object. See this class for the
809/// specifics of what information is provided.
811 : public AnalysisInfoMixin<LoopAccessAnalysis> {
813 static AnalysisKey Key;
814
815public:
817
819};
820
822 const LoopAccessInfo &LAI) const {
824}
825
827 const LoopAccessInfo &LAI) const {
828 return LAI.getDepChecker().getMemoryInstructions()[Destination];
829}
830
831} // End llvm namespace
832
833#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MapVector< const Value *, unsigned > OrderMap
Definition: AsmWriter.cpp:99
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
bool End
Definition: ELF_riscv.cpp:480
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
This header provides classes for managing per-loop analyses.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
raw_pwrite_stream & OS
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(MVT::SimpleValueType VT, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:387
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
An instruction for reading from memory.
Definition: Instructions.h:184
This analysis provides dependence information for the memory accesses of a loop.
LoopAccessInfoManager Result
Result run(Function &F, FunctionAnalysisManager &AM)
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
const LoopAccessInfo & getInfo(Loop &L)
LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, LoopInfo &LI, const TargetLibraryInfo *TLI)
Drive the analysis of memory accesses in the loop.
const MemoryDepChecker & getDepChecker() const
the Memory Dependence Checker which can determine the loop-independent and loop-carried dependences b...
bool hasDependenceInvolvingLoopInvariantAddress() const
If the loop has memory dependence involving an invariant address, i.e.
ArrayRef< StoreInst * > getStoresToInvariantAddresses() const
Return the list of stores to invariant addresses.
const OptimizationRemarkAnalysis * getReport() const
The diagnostics report generated for the analysis.
const RuntimePointerChecking * getRuntimePointerChecking() const
bool canVectorizeMemory() const
Return true we can analyze the memory accesses in the loop and there are no memory dependence cycles.
unsigned getNumLoads() const
unsigned getNumRuntimePointerChecks() const
Number of memchecks required to prove independence of otherwise may-alias pointers.
bool isInvariant(Value *V) const
Returns true if value V is loop invariant.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
const PredicatedScalarEvolution & getPSE() const
Used to add runtime SCEV checks.
unsigned getNumStores() const
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Return the list of instructions that use Ptr to read or write memory.
const DenseMap< Value *, const SCEV * > & getSymbolicStrides() const
If an access has a symbolic strides, this maps the pointer value to the stride symbol.
bool hasConvergentOp() const
Return true if there is a convergent operation in the loop.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
bool isSafeForAnyVectorWidth() const
Return true if the number of elements that are safe to operate on simultaneously is not bounded.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const DenseMap< Value *, const SCEV * > &Strides, const DenseMap< Value *, SmallVector< const Value *, 16 > > &UnderlyingObjects)
Check whether the dependencies between the accesses are safe.
EquivalenceClasses< MemAccessInfo > DepCandidates
Set of potential dependent memory accesses.
MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
const SmallVectorImpl< Instruction * > & getMemoryInstructions() const
The vector of memory access instructions.
const Loop * getInnermostLoop() const
uint64_t getMaxSafeVectorWidthInBits() const
Return the number of elements that are safe to operate on simultaneously, multiplied by the size of t...
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
const SmallVectorImpl< Dependence > * getDependences() const
Returns the memory dependences.
SmallVector< MemAccessInfo, 8 > MemAccessInfoList
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
bool shouldRetryWithRuntimeCheck() const
In same cases when the dependency check fails we can still vectorize the loop with a dynamic array ac...
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
PointerIntPair< Value *, 1, bool > MemAccessInfo
DenseMap< Instruction *, unsigned > generateInstructionOrderMap() const
Generate a mapping between the memory instructions and their indices according to program order.
Diagnostic information for optimization analysis remarks.
PointerIntPair - This class implements a pair of a pointer and small integer.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
RuntimePointerChecking(MemoryDepChecker &DC, ScalarEvolution *SE)
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
std::optional< ArrayRef< PointerDiffInfo > > getDiffChecks() const
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
bool empty() const
No run-time memory checking is necessary.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
ScalarEvolution * getSE() const
void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
const SmallVectorImpl< RuntimePointerCheck > & getChecks() const
Returns the checks that generateChecks created.
const PointerInfo & getPointerInfo(unsigned PtrIdx) const
Return PointerInfo for pointer at index PtrIdx.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Provides information about what library functions are available for the current target.
Value handle that tracks a Value across RAUW.
Definition: ValueHandle.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
std::optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
std::pair< const RuntimeCheckingPtrGroup *, const RuntimeCheckingPtrGroup * > RuntimePointerCheck
A memcheck which made up of a pair of grouped pointers.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
#define N
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:114
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26
Dependece between memory access instructions.
DepType Type
The type of the dependence.
unsigned Destination
Index of the destination of the dependence in the InstMap vector.
Dependence(unsigned Source, unsigned Destination, DepType Type)
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
bool isForward() const
Lexically forward dependence.
bool isBackward() const
Lexically backward dependence.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Instruction * getDestination(const LoopAccessInfo &LAI) const
Return the destination instruction of the dependence.
Instruction * getSource(const LoopAccessInfo &LAI) const
Return the source instruction of the dependence.
unsigned Source
Index of the source of the dependence in the InstMap vector.
DepType
The type of the dependence.
static const char * DepName[]
String version of the types.
PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart, unsigned AccessSize, bool NeedsFreeze)
unsigned AddressSpace
Address space of the involved pointers.
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
PointerInfo(Value *PointerValue, const SCEV *Start, const SCEV *End, bool IsWritePtr, unsigned DependencySetId, unsigned AliasSetId, const SCEV *Expr, bool NeedsFreeze)
const SCEV * Start
Holds the smallest byte address accessed by the pointer throughout all iterations of the loop.
const SCEV * Expr
SCEV for the access.
bool NeedsFreeze
True if the pointer expressions needs to be frozen after expansion.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
const SCEV * End
Holds the largest byte address accessed by the pointer throughout all iterations of the loop,...
TrackingVH< Value > PointerValue
Holds the pointer value that we need to check.
Collection of parameters shared beetween the Loop Vectorizer and the Loop Access Analysis.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static unsigned VectorizationFactor
VF as overridden by the user.
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.