Line data Source code
1 : //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : /// \file
11 : /// This file defines the LoopVectorizationLegality class. Original code
12 : /// in Loop Vectorizer has been moved out to its own file for modularity
13 : /// and reusability.
14 : ///
15 : /// Currently, it works for innermost loop vectorization. Extending this to
16 : /// outer loop vectorization is a TODO item.
17 : ///
18 : /// Also provides:
19 : /// 1) LoopVectorizeHints class which keeps a number of loop annotations
20 : /// locally for easy look up. It has the ability to write them back as
21 : /// loop metadata, upon request.
22 : /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
23 : /// of reporting useful failure to vectorize message.
24 : //
25 : //===----------------------------------------------------------------------===//
26 :
27 : #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 : #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
29 :
30 : #include "llvm/ADT/MapVector.h"
31 : #include "llvm/Analysis/LoopAccessAnalysis.h"
32 : #include "llvm/Analysis/OptimizationRemarkEmitter.h"
33 : #include "llvm/Transforms/Utils/LoopUtils.h"
34 :
35 : namespace llvm {
36 :
37 : /// Create an analysis remark that explains why vectorization failed
38 : ///
39 : /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
40 : /// RemarkName is the identifier for the remark. If \p I is passed it is an
41 : /// instruction that prevents vectorization. Otherwise \p TheLoop is used for
42 : /// the location of the remark. \return the remark object that can be
43 : /// streamed to.
44 : OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
45 : StringRef RemarkName,
46 : Loop *TheLoop,
47 : Instruction *I = nullptr);
48 :
49 : /// Utility class for getting and setting loop vectorizer hints in the form
50 : /// of loop metadata.
51 : /// This class keeps a number of loop annotations locally (as member variables)
52 : /// and can, upon request, write them back as metadata on the loop. It will
53 : /// initially scan the loop for existing metadata, and will update the local
54 : /// values based on information in the loop.
55 : /// We cannot write all values to metadata, as the mere presence of some info,
56 : /// for example 'force', means a decision has been made. So, we need to be
57 : /// careful NOT to add them if the user hasn't specifically asked so.
58 : class LoopVectorizeHints {
59 : enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
60 :
61 : /// Hint - associates name and validation with the hint value.
62 : struct Hint {
63 : const char *Name;
64 : unsigned Value; // This may have to change for non-numeric values.
65 : HintKind Kind;
66 :
67 : Hint(const char *Name, unsigned Value, HintKind Kind)
68 9662 : : Name(Name), Value(Value), Kind(Kind) {}
69 :
70 : bool validate(unsigned Val);
71 : };
72 :
73 : /// Vectorization width.
74 : Hint Width;
75 :
76 : /// Vectorization interleave factor.
77 : Hint Interleave;
78 :
79 : /// Vectorization forced
80 : Hint Force;
81 :
82 : /// Already Vectorized
83 : Hint IsVectorized;
84 :
85 : /// Return the loop metadata prefix.
86 : static StringRef Prefix() { return "llvm.loop."; }
87 :
88 : /// True if there is any unsafe math in the loop.
89 : bool PotentiallyUnsafe = false;
90 :
91 : public:
92 : enum ForceKind {
93 : FK_Undefined = -1, ///< Not selected.
94 : FK_Disabled = 0, ///< Forcing disabled.
95 : FK_Enabled = 1, ///< Forcing enabled.
96 : };
97 :
98 : LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
99 : OptimizationRemarkEmitter &ORE);
100 :
101 : /// Mark the loop L as already vectorized by setting the width to 1.
102 : void setAlreadyVectorized() {
103 1816 : IsVectorized.Value = 1;
104 1816 : Hint Hints[] = {IsVectorized};
105 1816 : writeHintsToMetadata(Hints);
106 : }
107 :
108 : bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const;
109 :
110 : /// Dumps all the hint information.
111 : void emitRemarkWithHints() const;
112 :
113 0 : unsigned getWidth() const { return Width.Value; }
114 0 : unsigned getInterleave() const { return Interleave.Value; }
115 0 : unsigned getIsVectorized() const { return IsVectorized.Value; }
116 0 : enum ForceKind getForce() const { return (ForceKind)Force.Value; }
117 :
118 : /// If hints are provided that force vectorization, use the AlwaysPrint
119 : /// pass name to force the frontend to print the diagnostic.
120 : const char *vectorizeAnalysisPassName() const;
121 :
122 : bool allowReordering() const {
123 : // When enabling loop hints are provided we allow the vectorizer to change
124 : // the order of operations that is given by the scalar loop. This is not
125 : // enabled by default because can be unsafe or inefficient. For example,
126 : // reordering floating-point operations will change the way round-off
127 : // error accumulates in the loop.
128 20 : return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
129 : }
130 :
131 : bool isPotentiallyUnsafe() const {
132 : // Avoid FP vectorization if the target is unsure about proper support.
133 : // This may be related to the SIMD unit in the target not handling
134 : // IEEE 754 FP ops properly, or bad single-to-double promotions.
135 : // Otherwise, a sequence of vectorized loops, even without reduction,
136 : // could lead to different end results on the destination vectors.
137 987 : return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
138 : }
139 :
140 194 : void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
141 :
142 : private:
143 : /// Find hints specified in the loop metadata and update local values.
144 : void getHintsFromMetadata();
145 :
146 : /// Checks string hint with one operand and set value if valid.
147 : void setHint(StringRef Name, Metadata *Arg);
148 :
149 : /// Create a new hint from name / value pair.
150 : MDNode *createHintMetadata(StringRef Name, unsigned V) const;
151 :
152 : /// Matches metadata with hint name.
153 : bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes);
154 :
155 : /// Sets current hints into loop metadata, keeping other values intact.
156 : void writeHintsToMetadata(ArrayRef<Hint> HintTypes);
157 :
158 : /// The loop these hints belong to.
159 : const Loop *TheLoop;
160 :
161 : /// Interface to emit optimization remarks.
162 : OptimizationRemarkEmitter &ORE;
163 : };
164 :
165 : /// This holds vectorization requirements that must be verified late in
166 : /// the process. The requirements are set by legalize and costmodel. Once
167 : /// vectorization has been determined to be possible and profitable the
168 : /// requirements can be verified by looking for metadata or compiler options.
169 : /// For example, some loops require FP commutativity which is only allowed if
170 : /// vectorization is explicitly specified or if the fast-math compiler option
171 : /// has been provided.
172 : /// Late evaluation of these requirements allows helpful diagnostics to be
173 : /// composed that tells the user what need to be done to vectorize the loop. For
174 : /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
175 : /// evaluation should be used only when diagnostics can generated that can be
176 : /// followed by a non-expert user.
177 : class LoopVectorizationRequirements {
178 : public:
179 3827 : LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE) : ORE(ORE) {}
180 :
181 0 : void addUnsafeAlgebraInst(Instruction *I) {
182 : // First unsafe algebra instruction.
183 11 : if (!UnsafeAlgebraInst)
184 11 : UnsafeAlgebraInst = I;
185 0 : }
186 :
187 992 : void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
188 :
189 : bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
190 :
191 : private:
192 : unsigned NumRuntimePointerChecks = 0;
193 : Instruction *UnsafeAlgebraInst = nullptr;
194 :
195 : /// Interface to emit optimization remarks.
196 : OptimizationRemarkEmitter &ORE;
197 : };
198 :
199 : /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
200 : /// to what vectorization factor.
201 : /// This class does not look at the profitability of vectorization, only the
202 : /// legality. This class has two main kinds of checks:
203 : /// * Memory checks - The code in canVectorizeMemory checks if vectorization
204 : /// will change the order of memory accesses in a way that will change the
205 : /// correctness of the program.
206 : /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
207 : /// checks for a number of different conditions, such as the availability of a
208 : /// single induction variable, that all types are supported and vectorize-able,
209 : /// etc. This code reflects the capabilities of InnerLoopVectorizer.
210 : /// This class is also used by InnerLoopVectorizer for identifying
211 : /// induction variable and the different reduction variables.
212 : class LoopVectorizationLegality {
213 : public:
214 3827 : LoopVectorizationLegality(
215 : Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
216 : TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
217 : std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
218 : OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
219 : LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
220 3827 : : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
221 7654 : ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
222 :
223 : /// ReductionList contains the reduction descriptors for all
224 : /// of the reductions that were found in the loop.
225 : using ReductionList = DenseMap<PHINode *, RecurrenceDescriptor>;
226 :
227 : /// InductionList saves induction variables and maps them to the
228 : /// induction descriptor.
229 : using InductionList = MapVector<PHINode *, InductionDescriptor>;
230 :
231 : /// RecurrenceSet contains the phi nodes that are recurrences other than
232 : /// inductions and reductions.
233 : using RecurrenceSet = SmallPtrSet<const PHINode *, 8>;
234 :
235 : /// Returns true if it is legal to vectorize this loop.
236 : /// This does not mean that it is profitable to vectorize this
237 : /// loop, only that it is legal to do so.
238 : /// Temporarily taking UseVPlanNativePath parameter. If true, take
239 : /// the new code path being implemented for outer loop vectorization
240 : /// (should be functional for inner loop vectorization) based on VPlan.
241 : /// If false, good old LV code.
242 : bool canVectorize(bool UseVPlanNativePath);
243 :
244 : /// Return true if we can vectorize this loop while folding its tail by
245 : /// masking.
246 : bool canFoldTailByMasking();
247 :
248 : /// Returns the primary induction variable.
249 0 : PHINode *getPrimaryInduction() { return PrimaryInduction; }
250 :
251 : /// Returns the reduction variables found in the loop.
252 1455 : ReductionList *getReductionVars() { return &Reductions; }
253 :
254 : /// Returns the induction variables found in the loop.
255 3474 : InductionList *getInductionVars() { return &Inductions; }
256 :
257 : /// Return the first-order recurrences found in the loop.
258 : RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
259 :
260 : /// Return the set of instructions to sink to handle first-order recurrences.
261 1350 : DenseMap<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
262 :
263 : /// Returns the widest induction type.
264 0 : Type *getWidestInductionType() { return WidestIndTy; }
265 :
266 : /// Returns True if V is a Phi node of an induction variable in this loop.
267 : bool isInductionPhi(const Value *V);
268 :
269 : /// Returns True if V is a cast that is part of an induction def-use chain,
270 : /// and had been proven to be redundant under a runtime guard (in other
271 : /// words, the cast has the same SCEV expression as the induction phi).
272 : bool isCastedInductionVariable(const Value *V);
273 :
274 : /// Returns True if V can be considered as an induction variable in this
275 : /// loop. V can be the induction phi, or some redundant cast in the def-use
276 : /// chain of the inducion phi.
277 : bool isInductionVariable(const Value *V);
278 :
279 : /// Returns True if PN is a reduction variable in this loop.
280 7064 : bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
281 :
282 : /// Returns True if Phi is a first-order recurrence in this loop.
283 : bool isFirstOrderRecurrence(const PHINode *Phi);
284 :
285 : /// Return true if the block BB needs to be predicated in order for the loop
286 : /// to be vectorized.
287 : bool blockNeedsPredication(BasicBlock *BB);
288 :
289 : /// Check if this pointer is consecutive when vectorizing. This happens
290 : /// when the last index of the GEP is the induction variable, or that the
291 : /// pointer itself is an induction variable.
292 : /// This check allows us to vectorize A[idx] into a wide load/store.
293 : /// Returns:
294 : /// 0 - Stride is unknown or non-consecutive.
295 : /// 1 - Address is consecutive.
296 : /// -1 - Address is consecutive, and decreasing.
297 : /// NOTE: This method must only be used before modifying the original scalar
298 : /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
299 : int isConsecutivePtr(Value *Ptr);
300 :
301 : /// Returns true if the value V is uniform within the loop.
302 : bool isUniform(Value *V);
303 :
304 : /// Returns the information that we collected about runtime memory check.
305 0 : const RuntimePointerChecking *getRuntimePointerChecking() const {
306 0 : return LAI->getRuntimePointerChecking();
307 : }
308 :
309 0 : const LoopAccessInfo *getLAI() const { return LAI; }
310 :
311 1837 : unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
312 :
313 0 : uint64_t getMaxSafeRegisterWidth() const {
314 983 : return LAI->getDepChecker().getMaxSafeRegisterWidth();
315 : }
316 :
317 0 : bool hasStride(Value *V) { return LAI->hasStride(V); }
318 :
319 : /// Returns true if vector representation of the instruction \p I
320 : /// requires mask.
321 5562 : bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
322 :
323 601 : unsigned getNumStores() const { return LAI->getNumStores(); }
324 601 : unsigned getNumLoads() const { return LAI->getNumLoads(); }
325 :
326 : // Returns true if the NoNaN attribute is set on the function.
327 0 : bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
328 :
329 : private:
330 : /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
331 : /// its nested loops are considered legal for vectorization. These legal
332 : /// checks are common for inner and outer loop vectorization.
333 : /// Temporarily taking UseVPlanNativePath parameter. If true, take
334 : /// the new code path being implemented for outer loop vectorization
335 : /// (should be functional for inner loop vectorization) based on VPlan.
336 : /// If false, good old LV code.
337 : bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
338 :
339 : /// Set up outer loop inductions by checking Phis in outer loop header for
340 : /// supported inductions (int inductions). Return false if any of these Phis
341 : /// is not a supported induction or if we fail to find an induction.
342 : bool setupOuterLoopInductions();
343 :
344 : /// Return true if the pre-header, exiting and latch blocks of \p Lp
345 : /// (non-recursive) are considered legal for vectorization.
346 : /// Temporarily taking UseVPlanNativePath parameter. If true, take
347 : /// the new code path being implemented for outer loop vectorization
348 : /// (should be functional for inner loop vectorization) based on VPlan.
349 : /// If false, good old LV code.
350 : bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
351 :
352 : /// Check if a single basic block loop is vectorizable.
353 : /// At this point we know that this is a loop with a constant trip count
354 : /// and we only need to check individual instructions.
355 : bool canVectorizeInstrs();
356 :
357 : /// When we vectorize loops we may change the order in which
358 : /// we read and write from memory. This method checks if it is
359 : /// legal to vectorize the code, considering only memory constrains.
360 : /// Returns true if the loop is vectorizable
361 : bool canVectorizeMemory();
362 :
363 : /// Return true if we can vectorize this loop using the IF-conversion
364 : /// transformation.
365 : bool canVectorizeWithIfConvert();
366 :
367 : /// Return true if we can vectorize this outer loop. The method performs
368 : /// specific checks for outer loop vectorization.
369 : bool canVectorizeOuterLoop();
370 :
371 : /// Return true if all of the instructions in the block can be speculatively
372 : /// executed. \p SafePtrs is a list of addresses that are known to be legal
373 : /// and we know that we can read from them without segfault.
374 : bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
375 :
376 : /// Updates the vectorization state by adding \p Phi to the inductions list.
377 : /// This can set \p Phi as the main induction of the loop if \p Phi is a
378 : /// better choice for the main induction than the existing one.
379 : void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
380 : SmallPtrSetImpl<Value *> &AllowedExit);
381 :
382 : /// Create an analysis remark that explains why vectorization failed
383 : ///
384 : /// \p RemarkName is the identifier for the remark. If \p I is passed it is
385 : /// an instruction that prevents vectorization. Otherwise the loop is used
386 : /// for the location of the remark. \return the remark object that can be
387 : /// streamed to.
388 : OptimizationRemarkAnalysis
389 0 : createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
390 0 : return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
391 2498 : RemarkName, TheLoop, I);
392 : }
393 :
394 : /// If an access has a symbolic strides, this maps the pointer value to
395 : /// the stride symbol.
396 0 : const ValueToValueMap *getSymbolicStrides() {
397 : // FIXME: Currently, the set of symbolic strides is sometimes queried before
398 : // it's collected. This happens from canVectorizeWithIfConvert, when the
399 : // pointer is checked to reference consecutive elements suitable for a
400 : // masked access.
401 8648 : return LAI ? &LAI->getSymbolicStrides() : nullptr;
402 : }
403 :
404 : /// The loop that we evaluate.
405 : Loop *TheLoop;
406 :
407 : /// Loop Info analysis.
408 : LoopInfo *LI;
409 :
410 : /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
411 : /// Applies dynamic knowledge to simplify SCEV expressions in the context
412 : /// of existing SCEV assumptions. The analysis will also add a minimal set
413 : /// of new predicates if this is required to enable vectorization and
414 : /// unrolling.
415 : PredicatedScalarEvolution &PSE;
416 :
417 : /// Target Library Info.
418 : TargetLibraryInfo *TLI;
419 :
420 : /// Dominator Tree.
421 : DominatorTree *DT;
422 :
423 : // LoopAccess analysis.
424 : std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
425 :
426 : // And the loop-accesses info corresponding to this loop. This pointer is
427 : // null until canVectorizeMemory sets it up.
428 : const LoopAccessInfo *LAI = nullptr;
429 :
430 : /// Interface to emit optimization remarks.
431 : OptimizationRemarkEmitter *ORE;
432 :
433 : // --- vectorization state --- //
434 :
435 : /// Holds the primary induction variable. This is the counter of the
436 : /// loop.
437 : PHINode *PrimaryInduction = nullptr;
438 :
439 : /// Holds the reduction variables.
440 : ReductionList Reductions;
441 :
442 : /// Holds all of the induction variables that we found in the loop.
443 : /// Notice that inductions don't need to start at zero and that induction
444 : /// variables can be pointers.
445 : InductionList Inductions;
446 :
447 : /// Holds all the casts that participate in the update chain of the induction
448 : /// variables, and that have been proven to be redundant (possibly under a
449 : /// runtime guard). These casts can be ignored when creating the vectorized
450 : /// loop body.
451 : SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
452 :
453 : /// Holds the phi nodes that are first-order recurrences.
454 : RecurrenceSet FirstOrderRecurrences;
455 :
456 : /// Holds instructions that need to sink past other instructions to handle
457 : /// first-order recurrences.
458 : DenseMap<Instruction *, Instruction *> SinkAfter;
459 :
460 : /// Holds the widest induction type encountered.
461 : Type *WidestIndTy = nullptr;
462 :
463 : /// Allowed outside users. This holds the induction and reduction
464 : /// vars which can be accessed from outside the loop.
465 : SmallPtrSet<Value *, 4> AllowedExit;
466 :
467 : /// Can we assume the absence of NaNs.
468 : bool HasFunNoNaNAttr = false;
469 :
470 : /// Vectorization requirements that will go through late-evaluation.
471 : LoopVectorizationRequirements *Requirements;
472 :
473 : /// Used to emit an analysis of any legality issues.
474 : LoopVectorizeHints *Hints;
475 :
476 : /// The demanded bits analsyis is used to compute the minimum type size in
477 : /// which a reduction can be computed.
478 : DemandedBits *DB;
479 :
480 : /// The assumption cache analysis is used to compute the minimum type size in
481 : /// which a reduction can be computed.
482 : AssumptionCache *AC;
483 :
484 : /// While vectorizing these instructions we have to generate a
485 : /// call to the appropriate masked intrinsic
486 : SmallPtrSet<const Instruction *, 8> MaskedOp;
487 : };
488 :
489 : } // namespace llvm
490 :
491 : #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
|