LLVM  9.0.0svn
LoopVectorizationLegality.h
Go to the documentation of this file.
1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
12 /// and reusability.
13 ///
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
16 ///
17 /// Also provides:
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 
29 #include "llvm/ADT/MapVector.h"
33 
34 namespace llvm {
35 
36 /// Create an analysis remark that explains why vectorization failed
37 ///
38 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
39 /// RemarkName is the identifier for the remark. If \p I is passed it is an
40 /// instruction that prevents vectorization. Otherwise \p TheLoop is used for
41 /// the location of the remark. \return the remark object that can be
42 /// streamed to.
43 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
44  StringRef RemarkName,
45  Loop *TheLoop,
46  Instruction *I = nullptr);
47 
48 /// Utility class for getting and setting loop vectorizer hints in the form
49 /// of loop metadata.
50 /// This class keeps a number of loop annotations locally (as member variables)
51 /// and can, upon request, write them back as metadata on the loop. It will
52 /// initially scan the loop for existing metadata, and will update the local
53 /// values based on information in the loop.
54 /// We cannot write all values to metadata, as the mere presence of some info,
55 /// for example 'force', means a decision has been made. So, we need to be
56 /// careful NOT to add them if the user hasn't specifically asked so.
58  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
59 
60  /// Hint - associates name and validation with the hint value.
61  struct Hint {
62  const char *Name;
63  unsigned Value; // This may have to change for non-numeric values.
64  HintKind Kind;
65 
66  Hint(const char *Name, unsigned Value, HintKind Kind)
67  : Name(Name), Value(Value), Kind(Kind) {}
68 
69  bool validate(unsigned Val);
70  };
71 
72  /// Vectorization width.
73  Hint Width;
74 
75  /// Vectorization interleave factor.
76  Hint Interleave;
77 
78  /// Vectorization forced
79  Hint Force;
80 
81  /// Already Vectorized
82  Hint IsVectorized;
83 
84  /// Return the loop metadata prefix.
85  static StringRef Prefix() { return "llvm.loop."; }
86 
87  /// True if there is any unsafe math in the loop.
88  bool PotentiallyUnsafe = false;
89 
90 public:
91  enum ForceKind {
92  FK_Undefined = -1, ///< Not selected.
93  FK_Disabled = 0, ///< Forcing disabled.
94  FK_Enabled = 1, ///< Forcing enabled.
95  };
96 
97  LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
99 
100  /// Mark the loop L as already vectorized by setting the width to 1.
101  void setAlreadyVectorized();
102 
103  bool allowVectorization(Function *F, Loop *L,
104  bool VectorizeOnlyWhenForced) const;
105 
106  /// Dumps all the hint information.
107  void emitRemarkWithHints() const;
108 
109  unsigned getWidth() const { return Width.Value; }
110  unsigned getInterleave() const { return Interleave.Value; }
111  unsigned getIsVectorized() const { return IsVectorized.Value; }
112  enum ForceKind getForce() const {
113  if ((ForceKind)Force.Value == FK_Undefined &&
115  return FK_Disabled;
116  return (ForceKind)Force.Value;
117  }
118 
119  /// If hints are provided that force vectorization, use the AlwaysPrint
120  /// pass name to force the frontend to print the diagnostic.
121  const char *vectorizeAnalysisPassName() const;
122 
123  bool allowReordering() const {
124  // When enabling loop hints are provided we allow the vectorizer to change
125  // the order of operations that is given by the scalar loop. This is not
126  // enabled by default because can be unsafe or inefficient. For example,
127  // reordering floating-point operations will change the way round-off
128  // error accumulates in the loop.
129  return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
130  }
131 
132  bool isPotentiallyUnsafe() const {
133  // Avoid FP vectorization if the target is unsure about proper support.
134  // This may be related to the SIMD unit in the target not handling
135  // IEEE 754 FP ops properly, or bad single-to-double promotions.
136  // Otherwise, a sequence of vectorized loops, even without reduction,
137  // could lead to different end results on the destination vectors.
138  return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
139  }
140 
141  void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
142 
143 private:
144  /// Find hints specified in the loop metadata and update local values.
145  void getHintsFromMetadata();
146 
147  /// Checks string hint with one operand and set value if valid.
148  void setHint(StringRef Name, Metadata *Arg);
149 
150  /// The loop these hints belong to.
151  const Loop *TheLoop;
152 
153  /// Interface to emit optimization remarks.
155 };
156 
157 /// This holds vectorization requirements that must be verified late in
158 /// the process. The requirements are set by legalize and costmodel. Once
159 /// vectorization has been determined to be possible and profitable the
160 /// requirements can be verified by looking for metadata or compiler options.
161 /// For example, some loops require FP commutativity which is only allowed if
162 /// vectorization is explicitly specified or if the fast-math compiler option
163 /// has been provided.
164 /// Late evaluation of these requirements allows helpful diagnostics to be
165 /// composed that tells the user what need to be done to vectorize the loop. For
166 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
167 /// evaluation should be used only when diagnostics can generated that can be
168 /// followed by a non-expert user.
170 public:
172 
174  // First unsafe algebra instruction.
175  if (!UnsafeAlgebraInst)
176  UnsafeAlgebraInst = I;
177  }
178 
179  void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
180 
181  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
182 
183 private:
184  unsigned NumRuntimePointerChecks = 0;
185  Instruction *UnsafeAlgebraInst = nullptr;
186 
187  /// Interface to emit optimization remarks.
189 };
190 
191 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
192 /// to what vectorization factor.
193 /// This class does not look at the profitability of vectorization, only the
194 /// legality. This class has two main kinds of checks:
195 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
196 /// will change the order of memory accesses in a way that will change the
197 /// correctness of the program.
198 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
199 /// checks for a number of different conditions, such as the availability of a
200 /// single induction variable, that all types are supported and vectorize-able,
201 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
202 /// This class is also used by InnerLoopVectorizer for identifying
203 /// induction variable and the different reduction variables.
205 public:
209  std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
212  : TheLoop(L), LI(LI), PSE(PSE), TLI(TLI), DT(DT), GetLAA(GetLAA),
213  ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
214 
215  /// ReductionList contains the reduction descriptors for all
216  /// of the reductions that were found in the loop.
218 
219  /// InductionList saves induction variables and maps them to the
220  /// induction descriptor.
222 
223  /// RecurrenceSet contains the phi nodes that are recurrences other than
224  /// inductions and reductions.
226 
227  /// Returns true if it is legal to vectorize this loop.
228  /// This does not mean that it is profitable to vectorize this
229  /// loop, only that it is legal to do so.
230  /// Temporarily taking UseVPlanNativePath parameter. If true, take
231  /// the new code path being implemented for outer loop vectorization
232  /// (should be functional for inner loop vectorization) based on VPlan.
233  /// If false, good old LV code.
234  bool canVectorize(bool UseVPlanNativePath);
235 
236  /// Return true if we can vectorize this loop while folding its tail by
237  /// masking.
238  bool canFoldTailByMasking();
239 
240  /// Returns the primary induction variable.
241  PHINode *getPrimaryInduction() { return PrimaryInduction; }
242 
243  /// Returns the reduction variables found in the loop.
244  ReductionList *getReductionVars() { return &Reductions; }
245 
246  /// Returns the induction variables found in the loop.
247  InductionList *getInductionVars() { return &Inductions; }
248 
249  /// Return the first-order recurrences found in the loop.
250  RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
251 
252  /// Return the set of instructions to sink to handle first-order recurrences.
254 
255  /// Returns the widest induction type.
256  Type *getWidestInductionType() { return WidestIndTy; }
257 
258  /// Returns True if V is a Phi node of an induction variable in this loop.
259  bool isInductionPhi(const Value *V);
260 
261  /// Returns True if V is a cast that is part of an induction def-use chain,
262  /// and had been proven to be redundant under a runtime guard (in other
263  /// words, the cast has the same SCEV expression as the induction phi).
264  bool isCastedInductionVariable(const Value *V);
265 
266  /// Returns True if V can be considered as an induction variable in this
267  /// loop. V can be the induction phi, or some redundant cast in the def-use
268  /// chain of the inducion phi.
269  bool isInductionVariable(const Value *V);
270 
271  /// Returns True if PN is a reduction variable in this loop.
272  bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
273 
274  /// Returns True if Phi is a first-order recurrence in this loop.
275  bool isFirstOrderRecurrence(const PHINode *Phi);
276 
277  /// Return true if the block BB needs to be predicated in order for the loop
278  /// to be vectorized.
279  bool blockNeedsPredication(BasicBlock *BB);
280 
281  /// Check if this pointer is consecutive when vectorizing. This happens
282  /// when the last index of the GEP is the induction variable, or that the
283  /// pointer itself is an induction variable.
284  /// This check allows us to vectorize A[idx] into a wide load/store.
285  /// Returns:
286  /// 0 - Stride is unknown or non-consecutive.
287  /// 1 - Address is consecutive.
288  /// -1 - Address is consecutive, and decreasing.
289  /// NOTE: This method must only be used before modifying the original scalar
290  /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
291  int isConsecutivePtr(Value *Ptr);
292 
293  /// Returns true if the value V is uniform within the loop.
294  bool isUniform(Value *V);
295 
296  /// Returns the information that we collected about runtime memory check.
298  return LAI->getRuntimePointerChecking();
299  }
300 
301  const LoopAccessInfo *getLAI() const { return LAI; }
302 
303  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
304 
305  uint64_t getMaxSafeRegisterWidth() const {
306  return LAI->getDepChecker().getMaxSafeRegisterWidth();
307  }
308 
309  bool hasStride(Value *V) { return LAI->hasStride(V); }
310 
311  /// Returns true if vector representation of the instruction \p I
312  /// requires mask.
313  bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
314 
315  unsigned getNumStores() const { return LAI->getNumStores(); }
316  unsigned getNumLoads() const { return LAI->getNumLoads(); }
317 
318  // Returns true if the NoNaN attribute is set on the function.
319  bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
320 
321 private:
322  /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
323  /// its nested loops are considered legal for vectorization. These legal
324  /// checks are common for inner and outer loop vectorization.
325  /// Temporarily taking UseVPlanNativePath parameter. If true, take
326  /// the new code path being implemented for outer loop vectorization
327  /// (should be functional for inner loop vectorization) based on VPlan.
328  /// If false, good old LV code.
329  bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
330 
331  /// Set up outer loop inductions by checking Phis in outer loop header for
332  /// supported inductions (int inductions). Return false if any of these Phis
333  /// is not a supported induction or if we fail to find an induction.
334  bool setupOuterLoopInductions();
335 
336  /// Return true if the pre-header, exiting and latch blocks of \p Lp
337  /// (non-recursive) are considered legal for vectorization.
338  /// Temporarily taking UseVPlanNativePath parameter. If true, take
339  /// the new code path being implemented for outer loop vectorization
340  /// (should be functional for inner loop vectorization) based on VPlan.
341  /// If false, good old LV code.
342  bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
343 
344  /// Check if a single basic block loop is vectorizable.
345  /// At this point we know that this is a loop with a constant trip count
346  /// and we only need to check individual instructions.
347  bool canVectorizeInstrs();
348 
349  /// When we vectorize loops we may change the order in which
350  /// we read and write from memory. This method checks if it is
351  /// legal to vectorize the code, considering only memory constrains.
352  /// Returns true if the loop is vectorizable
353  bool canVectorizeMemory();
354 
355  /// Return true if we can vectorize this loop using the IF-conversion
356  /// transformation.
357  bool canVectorizeWithIfConvert();
358 
359  /// Return true if we can vectorize this outer loop. The method performs
360  /// specific checks for outer loop vectorization.
361  bool canVectorizeOuterLoop();
362 
363  /// Return true if all of the instructions in the block can be speculatively
364  /// executed. \p SafePtrs is a list of addresses that are known to be legal
365  /// and we know that we can read from them without segfault.
366  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
367 
368  /// Updates the vectorization state by adding \p Phi to the inductions list.
369  /// This can set \p Phi as the main induction of the loop if \p Phi is a
370  /// better choice for the main induction than the existing one.
371  void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
372  SmallPtrSetImpl<Value *> &AllowedExit);
373 
374  /// Create an analysis remark that explains why vectorization failed
375  ///
376  /// \p RemarkName is the identifier for the remark. If \p I is passed it is
377  /// an instruction that prevents vectorization. Otherwise the loop is used
378  /// for the location of the remark. \return the remark object that can be
379  /// streamed to.
381  createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
382  return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
383  RemarkName, TheLoop, I);
384  }
385 
386  /// If an access has a symbolic strides, this maps the pointer value to
387  /// the stride symbol.
388  const ValueToValueMap *getSymbolicStrides() {
389  // FIXME: Currently, the set of symbolic strides is sometimes queried before
390  // it's collected. This happens from canVectorizeWithIfConvert, when the
391  // pointer is checked to reference consecutive elements suitable for a
392  // masked access.
393  return LAI ? &LAI->getSymbolicStrides() : nullptr;
394  }
395 
396  /// The loop that we evaluate.
397  Loop *TheLoop;
398 
399  /// Loop Info analysis.
400  LoopInfo *LI;
401 
402  /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
403  /// Applies dynamic knowledge to simplify SCEV expressions in the context
404  /// of existing SCEV assumptions. The analysis will also add a minimal set
405  /// of new predicates if this is required to enable vectorization and
406  /// unrolling.
408 
409  /// Target Library Info.
410  TargetLibraryInfo *TLI;
411 
412  /// Dominator Tree.
413  DominatorTree *DT;
414 
415  // LoopAccess analysis.
416  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
417 
418  // And the loop-accesses info corresponding to this loop. This pointer is
419  // null until canVectorizeMemory sets it up.
420  const LoopAccessInfo *LAI = nullptr;
421 
422  /// Interface to emit optimization remarks.
424 
425  // --- vectorization state --- //
426 
427  /// Holds the primary induction variable. This is the counter of the
428  /// loop.
429  PHINode *PrimaryInduction = nullptr;
430 
431  /// Holds the reduction variables.
432  ReductionList Reductions;
433 
434  /// Holds all of the induction variables that we found in the loop.
435  /// Notice that inductions don't need to start at zero and that induction
436  /// variables can be pointers.
437  InductionList Inductions;
438 
439  /// Holds all the casts that participate in the update chain of the induction
440  /// variables, and that have been proven to be redundant (possibly under a
441  /// runtime guard). These casts can be ignored when creating the vectorized
442  /// loop body.
443  SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
444 
445  /// Holds the phi nodes that are first-order recurrences.
446  RecurrenceSet FirstOrderRecurrences;
447 
448  /// Holds instructions that need to sink past other instructions to handle
449  /// first-order recurrences.
451 
452  /// Holds the widest induction type encountered.
453  Type *WidestIndTy = nullptr;
454 
455  /// Allowed outside users. This holds the induction and reduction
456  /// vars which can be accessed from outside the loop.
457  SmallPtrSet<Value *, 4> AllowedExit;
458 
459  /// Can we assume the absence of NaNs.
460  bool HasFunNoNaNAttr = false;
461 
462  /// Vectorization requirements that will go through late-evaluation.
463  LoopVectorizationRequirements *Requirements;
464 
465  /// Used to emit an analysis of any legality issues.
466  LoopVectorizeHints *Hints;
467 
468  /// The demanded bits analysis is used to compute the minimum type size in
469  /// which a reduction can be computed.
470  DemandedBits *DB;
471 
472  /// The assumption cache analysis is used to compute the minimum type size in
473  /// which a reduction can be computed.
474  AssumptionCache *AC;
475 
476  /// While vectorizing these instructions we have to generate a
477  /// call to the appropriate masked intrinsic
479 };
480 
481 } // namespace llvm
482 
483 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
Type * getWidestInductionType()
Returns the widest induction type.
bool isMaskRequired(const Instruction *I)
Returns true if vector representation of the instruction I requires mask.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
A cache of @llvm.assume calls within a function.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
F(f)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
Diagnostic information for optimization analysis remarks.
ReductionList * getReductionVars()
Returns the reduction variables found in the loop.
PHINode * getPrimaryInduction()
Returns the primary induction variable.
LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, std::function< const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
#define H(x, y, z)
Definition: MD5.cpp:57
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
Create an analysis remark that explains why vectorization failed.
bool isReductionVariable(PHINode *PN)
Returns True if PN is a reduction variable in this loop.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
const RuntimePointerChecking * getRuntimePointerChecking() const
Returns the information that we collected about runtime memory check.
A struct for saving information about induction variables.
This holds vectorization requirements that must be verified late in the process.
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
const LoopAccessInfo * getLAI() const
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
InductionList * getInductionVars()
Returns the induction variables found in the loop.
#define I(x, y, z)
Definition: MD5.cpp:58
RecurrenceSet * getFirstOrderRecurrences()
Return the first-order recurrences found in the loop.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LLVM Value Representation.
Definition: Value.h:72
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:331
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
DenseMap< Instruction *, Instruction * > & getSinkAfter()
Return the set of instructions to sink to handle first-order recurrences.
void emitRemarkWithHints() const
Dumps all the hint information.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)