LLVM 23.0.0git
LoopVectorizationLegality.h
Go to the documentation of this file.
1//===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file defines the LoopVectorizationLegality class. Original code
11/// in Loop Vectorizer has been moved out to its own file for modularity
12/// and reusability.
13///
14/// Currently, it works for innermost loop vectorization. Extending this to
15/// outer loop vectorization is a TODO item.
16///
17/// Also provides:
18/// 1) LoopVectorizeHints class which keeps a number of loop annotations
19/// locally for easy look up. It has the ability to write them back as
20/// loop metadata, upon request.
21/// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22/// of reporting useful failure to vectorize message.
23//
24//===----------------------------------------------------------------------===//
25
26#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28
29#include "llvm/ADT/MapVector.h"
33
34namespace llvm {
35class AssumptionCache;
36class BasicBlock;
38class DemandedBits;
39class DominatorTree;
40class Function;
41class Loop;
42class LoopInfo;
43class Metadata;
49class Type;
50
51/// Utility class for getting and setting loop vectorizer hints in the form
52/// of loop metadata.
53/// This class keeps a number of loop annotations locally (as member variables)
54/// and can, upon request, write them back as metadata on the loop. It will
55/// initially scan the loop for existing metadata, and will update the local
56/// values based on information in the loop.
57/// We cannot write all values to metadata, as the mere presence of some info,
58/// for example 'force', means a decision has been made. So, we need to be
59/// careful NOT to add them if the user hasn't specifically asked so.
61 enum HintKind {
62 HK_WIDTH,
63 HK_INTERLEAVE,
64 HK_FORCE,
65 HK_ISVECTORIZED,
66 HK_PREDICATE,
67 HK_SCALABLE
68 };
69
70 /// Hint - associates name and validation with the hint value.
71 struct Hint {
72 const char *Name;
73 unsigned Value; // This may have to change for non-numeric values.
74 HintKind Kind;
75
76 Hint(const char *Name, unsigned Value, HintKind Kind)
77 : Name(Name), Value(Value), Kind(Kind) {}
78
79 LLVM_ABI bool validate(unsigned Val);
80 };
81
82 /// Vectorization width.
83 Hint Width;
84
85 /// Vectorization interleave factor.
86 Hint Interleave;
87
88 /// Vectorization forced
89 Hint Force;
90
91 /// Already Vectorized
92 Hint IsVectorized;
93
94 /// Vector Predicate
95 Hint Predicate;
96
97 /// Says whether we should use fixed width or scalable vectorization.
98 Hint Scalable;
99
100 /// Return the loop metadata prefix.
101 static StringRef Prefix() { return "llvm.loop."; }
102
103 /// True if there is any unsafe math in the loop.
104 bool PotentiallyUnsafe = false;
105
106public:
108 FK_Undefined = -1, ///< Not selected.
109 FK_Disabled = 0, ///< Forcing disabled.
110 FK_Enabled = 1, ///< Forcing enabled.
111 };
112
114 /// Not selected.
116 /// Disables vectorization with scalable vectors.
118 /// Vectorize loops using scalable vectors or fixed-width vectors, but favor
119 /// scalable vectors when the cost-model is inconclusive. This is the
120 /// default when the scalable.enable hint is enabled through a pragma.
122 /// Always vectorize loops using scalable vectors if feasible (i.e. the plan
123 /// has a valid cost and is not restricted by fixed-length dependence
124 /// distances).
126 };
127
128 LLVM_ABI LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
130 const TargetTransformInfo *TTI = nullptr);
131
132 /// Mark the loop L as already vectorized by setting the width to 1.
134
136 bool VectorizeOnlyWhenForced) const;
137
138 /// Dumps all the hint information.
139 LLVM_ABI void emitRemarkWithHints() const;
140
142 return ElementCount::get(
143 Width.Value,
144 (ScalableForceKind)Scalable.Value == SK_PreferScalable ||
145 (ScalableForceKind)Scalable.Value == SK_AlwaysScalable);
146 }
147
148 unsigned getInterleave() const {
149 if (Interleave.Value)
150 return Interleave.Value;
151 // If interleaving is not explicitly set, assume that if we do not want
152 // unrolling, we also don't want any interleaving.
154 return 1;
155 return 0;
156 }
157 unsigned getIsVectorized() const { return IsVectorized.Value; }
158 unsigned getPredicate() const { return Predicate.Value; }
159 enum ForceKind getForce() const {
160 if ((ForceKind)Force.Value == FK_Undefined &&
162 return FK_Disabled;
163 return (ForceKind)Force.Value;
164 }
165
166 /// \return true if scalable vectorization has been explicitly disabled.
168 return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
169 }
170
171 /// \return true if scalable vectorization is always preferred over
172 /// fixed-length when feasible, regardless of cost.
174 return (ScalableForceKind)Scalable.Value == SK_AlwaysScalable;
175 }
176
177 /// When enabling loop hints are provided we allow the vectorizer to change
178 /// the order of operations that is given by the scalar loop. This is not
179 /// enabled by default because can be unsafe or inefficient. For example,
180 /// reordering floating-point operations will change the way round-off
181 /// error accumulates in the loop.
182 LLVM_ABI bool allowReordering() const;
183
184 bool isPotentiallyUnsafe() const {
185 // Avoid FP vectorization if the target is unsure about proper support.
186 // This may be related to the SIMD unit in the target not handling
187 // IEEE 754 FP ops properly, or bad single-to-double promotions.
188 // Otherwise, a sequence of vectorized loops, even without reduction,
189 // could lead to different end results on the destination vectors.
190 return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
191 }
192
193 void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
194
195private:
196 /// Find hints specified in the loop metadata and update local values.
197 void getHintsFromMetadata();
198
199 /// Checks string hint with one operand and set value if valid.
200 void setHint(StringRef Name, Metadata *Arg);
201
202 /// The loop these hints belong to.
203 const Loop *TheLoop;
204
205 /// Interface to emit optimization remarks.
207
208 /// Reports a condition where loop vectorization is disallowed: prints
209 /// \p DebugMsg for debugging purposes along with the corresponding
210 /// optimization remark \p RemarkName, with \p RemarkMsg as the user-facing
211 /// message. The loop \p L is used for the location of the remark.
212 void reportDisallowedVectorization(const StringRef DebugMsg,
213 const StringRef RemarkName,
214 const StringRef RemarkMsg,
215 const Loop *L) const;
216};
217
218/// This holds vectorization requirements that must be verified late in
219/// the process. The requirements are set by legalize and costmodel. Once
220/// vectorization has been determined to be possible and profitable the
221/// requirements can be verified by looking for metadata or compiler options.
222/// For example, some loops require FP commutativity which is only allowed if
223/// vectorization is explicitly specified or if the fast-math compiler option
224/// has been provided.
225/// Late evaluation of these requirements allows helpful diagnostics to be
226/// composed that tells the user what need to be done to vectorize the loop. For
227/// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
228/// evaluation should be used only when diagnostics can generated that can be
229/// followed by a non-expert user.
231public:
232 /// Track the 1st floating-point instruction that can not be reassociated.
234 if (I && !ExactFPMathInst)
235 ExactFPMathInst = I;
236 }
237
238 Instruction *getExactFPInst() { return ExactFPMathInst; }
239
240private:
241 Instruction *ExactFPMathInst = nullptr;
242};
243
244/// This holds details about a histogram operation -- a load -> update -> store
245/// sequence where each lane in a vector might be updating the same element as
246/// another lane.
255
256/// Indicates the characteristics of a loop with an uncountable exit.
257/// * None -- No uncountable exit present.
258/// * ReadOnly -- At least one uncountable exit in a readonly loop.
259/// * ReadWrite -- At least one uncountable exit in a loop with side effects
260/// that may require masking.
262
263/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
264/// to what vectorization factor.
265/// This class does not look at the profitability of vectorization, only the
266/// legality. This class has two main kinds of checks:
267/// * Memory checks - The code in canVectorizeMemory checks if vectorization
268/// will change the order of memory accesses in a way that will change the
269/// correctness of the program.
270/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
271/// checks for a number of different conditions, such as the availability of a
272/// single induction variable, that all types are supported and vectorize-able,
273/// etc. This code reflects the capabilities of InnerLoopVectorizer.
274/// This class is also used by InnerLoopVectorizer for identifying
275/// induction variable and the different reduction variables.
277public:
283 AssumptionCache *AC, bool AllowRuntimeSCEVChecks, AAResults *AA)
284 : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
285 ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
286 AllowRuntimeSCEVChecks(AllowRuntimeSCEVChecks), AA(AA) {}
287
288 /// ReductionList contains the reduction descriptors for all
289 /// of the reductions that were found in the loop.
291
292 /// InductionList saves induction variables and maps them to the
293 /// induction descriptor.
295
296 /// RecurrenceSet contains the phi nodes that are recurrences other than
297 /// inductions and reductions.
299
300 /// Returns true if it is legal to vectorize this loop.
301 /// This does not mean that it is profitable to vectorize this
302 /// loop, only that it is legal to do so.
303 /// Temporarily taking UseVPlanNativePath parameter. If true, take
304 /// the new code path being implemented for outer loop vectorization
305 /// (should be functional for inner loop vectorization) based on VPlan.
306 /// If false, good old LV code.
307 LLVM_ABI bool canVectorize(bool UseVPlanNativePath);
308
309 /// Returns true if it is legal to vectorize the FP math operations in this
310 /// loop. Vectorizing is legal if we allow reordering of FP operations, or if
311 /// we can use in-order reductions.
312 LLVM_ABI bool canVectorizeFPMath(bool EnableStrictReductions);
313
314 /// Return true if we can vectorize this loop while folding its tail by
315 /// masking.
316 LLVM_ABI bool canFoldTailByMasking() const;
317
318 /// Mark all respective loads/stores for masking. Must only be called when
319 /// tail-folding is possible.
321
322 /// Returns the primary induction variable.
323 PHINode *getPrimaryInduction() { return PrimaryInduction; }
324
325 /// Returns the reduction variables found in the loop.
326 const ReductionList &getReductionVars() const { return Reductions; }
327
328 /// Returns the recurrence descriptor associated with a given phi node \p PN,
329 /// expecting one to exist.
332 "only reductions have recurrence descriptors");
333 return Reductions.find(PN)->second;
334 }
335
336 /// Returns the induction variables found in the loop.
337 const InductionList &getInductionVars() const { return Inductions; }
338
339 /// Return the fixed-order recurrences found in the loop.
340 RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
341
342 /// Returns the widest induction type.
343 IntegerType *getWidestInductionType() { return WidestIndTy; }
344
345 /// Returns True if given store is a final invariant store of one of the
346 /// reductions found in the loop.
348
349 /// Returns True if given address is invariant and is used to store recurrent
350 /// expression
352
353 /// Returns True if V is a Phi node of an induction variable in this loop.
354 LLVM_ABI bool isInductionPhi(const Value *V) const;
355
356 /// Returns True if V is a cast that is part of an induction def-use chain,
357 /// and had been proven to be redundant under a runtime guard (in other
358 /// words, the cast has the same SCEV expression as the induction phi).
359 LLVM_ABI bool isCastedInductionVariable(const Value *V) const;
360
361 /// Returns True if V can be considered as an induction variable in this
362 /// loop. V can be the induction phi, or some redundant cast in the def-use
363 /// chain of the inducion phi.
364 LLVM_ABI bool isInductionVariable(const Value *V) const;
365
366 /// Returns True if PN is a reduction variable in this loop.
367 bool isReductionVariable(PHINode *PN) const { return Reductions.count(PN); }
368
369 /// Returns True if Phi is a fixed-order recurrence in this loop.
370 LLVM_ABI bool isFixedOrderRecurrence(const PHINode *Phi) const;
371
372 /// Return true if the block BB needs to be predicated in order for the loop
373 /// to be vectorized.
374 LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB) const;
375
376 /// Add unit stride predicates for memory accesses to PSE, if runtime checks
377 /// are allowed and an inner loop is vectorized.
379
380 /// Check if this pointer is consecutive when vectorizing. This happens
381 /// when the last index of the GEP is the induction variable, or that the
382 /// pointer itself is an induction variable.
383 /// This check allows us to vectorize A[idx] into a wide load/store.
384 /// Returns:
385 /// 0 - Stride is unknown or non-consecutive.
386 /// 1 - Address is consecutive.
387 /// -1 - Address is consecutive, and decreasing.
388 /// NOTE: This method must only be used before modifying the original scalar
389 /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
390 LLVM_ABI int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
391
392 /// Returns true if \p V is invariant across all loop iterations according to
393 /// SCEV.
394 LLVM_ABI bool isInvariant(Value *V) const;
395
396 /// Returns true if value V is uniform across \p VF lanes, when \p VF is
397 /// provided, and otherwise if \p V is invariant across all loop iterations.
398 LLVM_ABI bool isUniform(Value *V, std::optional<ElementCount> VF) const;
399
400 /// A uniform memory op is a load or store which accesses the same memory
401 /// location on all \p VF lanes, if \p VF is provided and otherwise if the
402 /// memory location is invariant.
404 std::optional<ElementCount> VF) const;
405
406 /// Returns the information that we collected about runtime memory check.
408 return LAI->getRuntimePointerChecking();
409 }
410
411 const LoopAccessInfo *getLAI() const { return LAI; }
412
414 return LAI->getDepChecker().isSafeForAnyVectorWidth() &&
415 LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
416 }
417
419 return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
420 }
421
422 /// Returns information about whether this loop contains at least one
423 /// uncountable early exit, and if so, if it also contains instructions (such
424 /// as stores) that cause side-effects.
426 return UncountableExitType;
427 }
428
429 /// Returns true if the loop has uncountable early exits, i.e. uncountable
430 /// exits that aren't the latch block.
434
435 /// Returns true if this is an early exit loop with state-changing or
436 /// potentially-faulting operations and the condition for the uncountable
437 /// exit must be determined before any of the state changes or potentially
438 /// faulting operations take place.
442
443 /// Return true if there is store-load forwarding dependencies.
445 return LAI->getDepChecker().isSafeForAnyStoreLoadForwardDistances();
446 }
447
448 /// Return safe power-of-2 number of elements, which do not prevent store-load
449 /// forwarding and safe to operate simultaneously.
451 return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
452 }
453
454 /// Returns true if instruction \p I requires a mask for vectorization.
455 /// This accounts for both control flow masking (conditionally executed
456 /// blocks) and tail-folding masking (predicated loop vectorization).
457 bool isMaskRequired(const Instruction *I, bool TailFolded) const {
458 if (TailFolded)
459 return TailFoldedMaskedOp.contains(I);
460 return ConditionallyExecutedOps.contains(I);
461 }
462
463 /// Returns true if there is at least one function call in the loop which
464 /// has a vectorized variant available.
465 bool hasVectorCallVariants() const { return VecCallVariantsFound; }
466
467 unsigned getNumStores() const { return LAI->getNumStores(); }
468 unsigned getNumLoads() const { return LAI->getNumLoads(); }
469
470 /// Returns a HistogramInfo* for the given instruction if it was determined
471 /// to be part of a load -> update -> store sequence where multiple lanes
472 /// may be working on the same memory address.
473 std::optional<const HistogramInfo *> getHistogramInfo(Instruction *I) const {
474 for (const HistogramInfo &HGram : Histograms)
475 if (HGram.Load == I || HGram.Update == I || HGram.Store == I)
476 return &HGram;
477
478 return std::nullopt;
479 }
480
481 /// Returns a list of all known histogram operations in the loop.
482 bool hasHistograms() const { return !Histograms.empty(); }
483
487
488 Loop *getLoop() const { return TheLoop; }
489
490 LoopInfo *getLoopInfo() const { return LI; }
491
492 AssumptionCache *getAssumptionCache() const { return AC; }
493
494 ScalarEvolution *getScalarEvolution() const { return PSE.getSE(); }
495
496 DominatorTree *getDominatorTree() const { return DT; }
497
498 /// Returns all exiting blocks with a countable exit, i.e. the
499 /// exit-not-taken count is known exactly at compile time.
501 return CountableExitingBlocks;
502 }
503
504private:
505 /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
506 /// its nested loops are considered legal for vectorization. These legal
507 /// checks are common for inner and outer loop vectorization.
508 /// Temporarily taking UseVPlanNativePath parameter. If true, take
509 /// the new code path being implemented for outer loop vectorization
510 /// (should be functional for inner loop vectorization) based on VPlan.
511 /// If false, good old LV code.
512 bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
513
514 /// Set up outer loop inductions by checking Phis in outer loop header for
515 /// supported inductions (int inductions). Return false if any of these Phis
516 /// is not a supported induction or if we fail to find an induction.
517 bool setupOuterLoopInductions();
518
519 /// Return true if the pre-header, exiting and latch blocks of \p Lp
520 /// (non-recursive) are considered legal for vectorization.
521 /// Temporarily taking UseVPlanNativePath parameter. If true, take
522 /// the new code path being implemented for outer loop vectorization
523 /// (should be functional for inner loop vectorization) based on VPlan.
524 /// If false, good old LV code.
525 bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath) const;
526
527 /// Check if a single basic block loop is vectorizable.
528 /// At this point we know that this is a loop with a constant trip count
529 /// and we only need to check individual instructions.
530 bool canVectorizeInstrs();
531
532 /// Check if an individual instruction is vectorizable.
533 bool canVectorizeInstr(Instruction &I);
534
535 /// When we vectorize loops we may change the order in which
536 /// we read and write from memory. This method checks if it is
537 /// legal to vectorize the code, considering only memory constrains.
538 /// Returns true if the loop is vectorizable
539 bool canVectorizeMemory();
540
541 /// If LAA cannot determine whether all dependences are safe, we may be able
542 /// to further analyse some IndirectUnsafe dependences and if they match a
543 /// certain pattern (like a histogram) then we may still be able to vectorize.
544 bool canVectorizeIndirectUnsafeDependences();
545
546 /// Return true if we can vectorize this loop using the IF-conversion
547 /// transformation.
548 bool canVectorizeWithIfConvert();
549
550 /// Return true if we can vectorize this outer loop. The method performs
551 /// specific checks for outer loop vectorization.
552 bool canVectorizeOuterLoop();
553
554 /// Returns true if this is an early exit loop that can be vectorized.
555 /// Currently, a loop with an uncountable early exit is considered
556 /// vectorizable if:
557 /// 1. Writes to memory will access different underlying objects than
558 /// any load used as part of the uncountable exit condition.
559 /// 2. The loop has only one early uncountable exit
560 /// 3. The early exit block dominates the latch block.
561 /// 4. The latch block has an exact exit count.
562 /// 5. The loop does not contain reductions or recurrences.
563 /// 6. We can prove at compile-time that loops will not contain faulting
564 /// loads, or that any faulting loads would also occur in a purely
565 /// scalar loop.
566 /// 7. It is safe to speculatively execute instructions such as divide or
567 /// call instructions.
568 /// The list above is not based on theoretical limitations of vectorization,
569 /// but simply a statement that more work is needed to support these
570 /// additional cases safely.
571 bool isVectorizableEarlyExitLoop();
572
573 /// When vectorizing an early exit loop containing side effects, we need to
574 /// determine whether an uncounted exit will be taken before any operation
575 /// that has side effects.
576 ///
577 /// Consider a loop like the following:
578 /// for (int i = 0; i < N; ++i) {
579 /// a[i] = b[i];
580 /// if (c[i] == 0)
581 /// break;
582 /// }
583 ///
584 /// We have both a load and a store operation occurring before the condition
585 /// is checked for early termination. We could potentially restrict
586 /// vectorization to cases where we know all addresses are guaranteed to be
587 /// dereferenceable, which would allow the load before the condition check to
588 /// be vectorized.
589 ///
590 /// The store, however, should not execute across all lanes if early
591 /// termination occurs before the end of the vector. We must only store to the
592 /// locations that would have been stored to by a scalar loop. So we need to
593 /// know what the result of 'c[i] == 0' is before performing the vector store,
594 /// with or without masking.
595 ///
596 /// We can either do this by moving the condition load to the top of the
597 /// vector body and using the comparison to create masks for other operations
598 /// in the loop, or by looking ahead one vector iteration and bailing out to
599 /// the scalar loop if an exit would occur.
600 ///
601 /// Using the latter approach (applicable to more targets), we need to hoist
602 /// the first load (of c[0]) out of the loop then rotate the load within the
603 /// loop to the next iteration, remembering to adjust the vector trip count.
604 /// Something like the following:
605 ///
606 /// vec.ph:
607 /// %ci.0 = load <4 x i32>, ptr %c
608 /// %cmp.0 = icmp eq <4 x i32> %ci.0, zeroinitializer
609 /// %any.of.0 = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.0)
610 /// br i1 %any.of.0, label %scalar.ph, label %vec.body
611 /// vec.body:
612 /// %iv = phi...
613 /// phi for c[i] if used elsewhere in the loop...
614 /// other operations in the loop...
615 /// %iv.next = add i64 %iv, 4
616 /// %addr.next = getelementptr i32, ptr %c, i64 %iv.next
617 /// %ci.next = load <4 x i32>, ptr %addr.next
618 /// %cmp.next = icmp eq <4 x i32> %ci.next, zeroinitializer
619 /// %any.of.next = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %cmp.next)
620 /// iv.next compared with shortened vector tripcount...
621 /// uncountable condition combined with counted condition...
622 /// br...
623 ///
624 /// Doing this means the last few iterations will always be performed by a
625 /// scalar loop regardless of which exit is taken, and so vector iterations
626 /// will never execute a memory operation to a location that the scalar loop
627 /// would not have.
628 ///
629 /// This means we must ensure that it is safe to move the load for 'c[i]'
630 /// before other memory operations (or any other observable side effects) in
631 /// the loop.
632 ///
633 /// Currently, c[i] must have only one user (the comparison used for the
634 /// uncountable exit) since we would otherwise need to introduce a PHI node
635 /// for it.
636 bool canUncountableExitConditionLoadBeMoved(BasicBlock *ExitingBlock);
637
638 /// Return true if all of the instructions in the block can be speculatively
639 /// executed, and record the loads/stores that require masking.
640 /// \p SafePtrs is a list of addresses that are known to be legal and we know
641 /// that we can read from them without segfault.
642 /// \p MaskedOp is a list of instructions that have to be transformed into
643 /// calls to the appropriate masked intrinsic when the loop is vectorized
644 /// or dropped if the instruction is a conditional assume intrinsic.
645 bool
646 blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
648
649 /// Updates the vectorization state by adding \p Phi to the inductions list.
650 /// This can set \p Phi as the main induction of the loop if \p Phi is a
651 /// better choice for the main induction than the existing one.
652 void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID);
653
654 /// The loop that we evaluate.
655 Loop *TheLoop;
656
657 /// Loop Info analysis.
658 LoopInfo *LI;
659
660 /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
661 /// Applies dynamic knowledge to simplify SCEV expressions in the context
662 /// of existing SCEV assumptions. The analysis will also add a minimal set
663 /// of new predicates if this is required to enable vectorization and
664 /// unrolling.
666
667 /// Target Transform Info.
669
670 /// Target Library Info.
672
673 /// Dominator Tree.
674 DominatorTree *DT;
675
676 // LoopAccess analysis.
678
679 const LoopAccessInfo *LAI = nullptr;
680
681 /// Interface to emit optimization remarks.
683
684 // --- vectorization state --- //
685
686 /// Holds the primary induction variable. This is the counter of the
687 /// loop.
688 PHINode *PrimaryInduction = nullptr;
689
690 /// Holds the reduction variables.
692
693 /// Holds all of the induction variables that we found in the loop.
694 /// Notice that inductions don't need to start at zero and that induction
695 /// variables can be pointers.
696 InductionList Inductions;
697
698 /// Holds all the casts that participate in the update chain of the induction
699 /// variables, and that have been proven to be redundant (possibly under a
700 /// runtime guard). These casts can be ignored when creating the vectorized
701 /// loop body.
702 SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
703
704 /// Holds the phi nodes that are fixed-order recurrences.
705 RecurrenceSet FixedOrderRecurrences;
706
707 /// Holds the widest induction type encountered.
708 IntegerType *WidestIndTy = nullptr;
709
710 /// Vectorization requirements that will go through late-evaluation.
711 LoopVectorizationRequirements *Requirements;
712
713 /// Used to emit an analysis of any legality issues.
714 LoopVectorizeHints *Hints;
715
716 /// The demanded bits analysis is used to compute the minimum type size in
717 /// which a reduction can be computed.
718 DemandedBits *DB;
719
720 /// The assumption cache analysis is used to compute the minimum type size in
721 /// which a reduction can be computed.
722 AssumptionCache *AC;
723
724 /// Instructions that require masking because they are in source-level
725 /// conditionally executed blocks.
726 SmallPtrSet<const Instruction *, 8> ConditionallyExecutedOps;
727 /// Instructions that require masking only due to tail-folding predication.
728 SmallPtrSet<const Instruction *, 8> TailFoldedMaskedOp;
729
730 /// Contains all identified histogram operations, which are sequences of
731 /// load -> update -> store instructions where multiple lanes in a vector
732 /// may work on the same memory location.
734
735 /// Whether or not creating SCEV predicates is allowed.
736 bool AllowRuntimeSCEVChecks;
737
738 // Alias Analysis results used to check for possible aliasing with loads
739 // used in uncountable exit conditions.
740 AAResults *AA;
741
742 /// If we discover function calls within the loop which have a valid
743 /// vectorized variant, record that fact so that LoopVectorize can
744 /// (potentially) make a better decision on the maximum VF and enable
745 /// the use of those function variants.
746 bool VecCallVariantsFound = false;
747
748 /// Keep track of all the countable and uncountable exiting blocks if
749 /// the exact backedge taken count is not computable.
750 SmallVector<BasicBlock *, 4> CountableExitingBlocks;
751
752 /// Records whether we have an uncountable early exit in a loop that's
753 /// either read-only or read-write.
755};
756
757} // namespace llvm
758
759#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define LLVM_ABI
Definition Compiler.h:215
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
This file implements a map that provides insertion order iteration.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
A struct for saving information about induction variables.
Class to represent integer types.
An instruction for reading from memory.
Drive the analysis of memory accesses in the loop.
MapVector< PHINode *, InductionDescriptor > InductionList
InductionList saves induction variables and maps them to the induction descriptor.
LLVM_ABI bool isInvariantStoreOfReduction(StoreInst *SI)
Returns True if given store is a final invariant store of one of the reductions found in the loop.
bool hasVectorCallVariants() const
Returns true if there is at least one function call in the loop which has a vectorized variant availa...
LLVM_ABI void collectUnitStridePredicates() const
Add unit stride predicates for memory accesses to PSE, if runtime checks are allowed and an inner loo...
const RecurrenceDescriptor & getRecurrenceDescriptor(PHINode *PN) const
Returns the recurrence descriptor associated with a given phi node PN, expecting one to exist.
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
uint64_t getMaxStoreLoadForwardSafeDistanceInBits() const
Return safe power-of-2 number of elements, which do not prevent store-load forwarding and safe to ope...
LLVM_ABI bool isInvariantAddressOfReduction(Value *V)
Returns True if given address is invariant and is used to store recurrent expression.
LLVM_ABI bool canVectorize(bool UseVPlanNativePath)
Returns true if it is legal to vectorize this loop.
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB) const
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
LLVM_ABI int isConsecutivePtr(Type *AccessTy, Value *Ptr) const
Check if this pointer is consecutive when vectorizing.
AssumptionCache * getAssumptionCache() const
std::optional< const HistogramInfo * > getHistogramInfo(Instruction *I) const
Returns a HistogramInfo* for the given instruction if it was determined to be part of a load -> updat...
SmallPtrSet< const PHINode *, 8 > RecurrenceSet
RecurrenceSet contains the phi nodes that are recurrences other than inductions and reductions.
bool hasUncountableExitWithSideEffects() const
Returns true if this is an early exit loop with state-changing or potentially-faulting operations and...
LLVM_ABI bool canVectorizeFPMath(bool EnableStrictReductions)
Returns true if it is legal to vectorize the FP math operations in this loop.
bool isReductionVariable(PHINode *PN) const
Returns True if PN is a reduction variable in this loop.
LLVM_ABI bool isFixedOrderRecurrence(const PHINode *Phi) const
Returns True if Phi is a fixed-order recurrence in this loop.
IntegerType * getWidestInductionType()
Returns the widest induction type.
LLVM_ABI bool isInductionPhi(const Value *V) const
Returns True if V is a Phi node of an induction variable in this loop.
PHINode * getPrimaryInduction()
Returns the primary induction variable.
UncountableExitTrait getUncountableExitTrait() const
Returns information about whether this loop contains at least one uncountable early exit,...
const SmallVector< BasicBlock *, 4 > & getCountableExitingBlocks() const
Returns all exiting blocks with a countable exit, i.e.
const InductionList & getInductionVars() const
Returns the induction variables found in the loop.
LLVM_ABI bool isInvariant(Value *V) const
Returns true if V is invariant across all loop iterations according to SCEV.
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
bool isSafeForAnyStoreLoadForwardDistances() const
Return true if there is store-load forwarding dependencies.
LLVM_ABI bool canFoldTailByMasking() const
Return true if we can vectorize this loop while folding its tail by masking.
LLVM_ABI void prepareToFoldTailByMasking()
Mark all respective loads/stores for masking.
bool hasUncountableEarlyExit() const
Returns true if the loop has uncountable early exits, i.e.
LLVM_ABI bool isUniformMemOp(Instruction &I, std::optional< ElementCount > VF) const
A uniform memory op is a load or store which accesses the same memory location on all VF lanes,...
bool hasHistograms() const
Returns a list of all known histogram operations in the loop.
const LoopAccessInfo * getLAI() const
MapVector< PHINode *, RecurrenceDescriptor > ReductionList
ReductionList contains the reduction descriptors for all of the reductions that were found in the loo...
ScalarEvolution * getScalarEvolution() const
bool isMaskRequired(const Instruction *I, bool TailFolded) const
Returns true if instruction I requires a mask for vectorization.
LLVM_ABI bool isUniform(Value *V, std::optional< ElementCount > VF) const
Returns true if value V is uniform across VF lanes, when VF is provided, and otherwise if V is invari...
LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F, LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC, bool AllowRuntimeSCEVChecks, AAResults *AA)
const RuntimePointerChecking * getRuntimePointerChecking() const
Returns the information that we collected about runtime memory check.
LLVM_ABI bool isInductionVariable(const Value *V) const
Returns True if V can be considered as an induction variable in this loop.
LLVM_ABI bool isCastedInductionVariable(const Value *V) const
Returns True if V is a cast that is part of an induction def-use chain, and had been proven to be red...
This holds vectorization requirements that must be verified late in the process.
void addExactFPMathInst(Instruction *I)
Track the 1st floating-point instruction that can not be reassociated.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
@ SK_PreferScalable
Vectorize loops using scalable vectors or fixed-width vectors, but favor scalable vectors when the co...
@ SK_AlwaysScalable
Always vectorize loops using scalable vectors if feasible (i.e.
@ SK_FixedWidthOnly
Disables vectorization with scalable vectors.
LLVM_ABI bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
LLVM_ABI bool allowReordering() const
When enabling loop hints are provided we allow the vectorizer to change the order of operations that ...
LLVM_ABI void emitRemarkWithHints() const
Dumps all the hint information.
LLVM_ABI void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LLVM_ABI LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE, const TargetTransformInfo *TTI=nullptr)
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
The main scalar evolution driver.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM Value Representation.
Definition Value.h:75
Abstract Attribute helper functions.
Definition Attributor.h:165
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
UncountableExitTrait
Indicates the characteristics of a loop with an uncountable exit.
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Value
Definition InstrProf.h:143
LLVM_ABI bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
LLVM_ABI TransformationMode hasUnrollTransformation(const Loop *L)
TargetTransformInfo TTI
@ TM_Disable
The transformation should not be applied.
Definition LoopUtils.h:292
This holds details about a histogram operation – a load -> update -> store sequence where each lane i...
HistogramInfo(LoadInst *Load, Instruction *Update, StoreInst *Store)