LLVM  9.0.0svn
LoopVectorizationLegality.h
Go to the documentation of this file.
1 //===- llvm/Transforms/Vectorize/LoopVectorizationLegality.h ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file defines the LoopVectorizationLegality class. Original code
11 /// in Loop Vectorizer has been moved out to its own file for modularity
12 /// and reusability.
13 ///
14 /// Currently, it works for innermost loop vectorization. Extending this to
15 /// outer loop vectorization is a TODO item.
16 ///
17 /// Also provides:
18 /// 1) LoopVectorizeHints class which keeps a number of loop annotations
19 /// locally for easy look up. It has the ability to write them back as
20 /// loop metadata, upon request.
21 /// 2) LoopVectorizationRequirements class for lazy bail out for the purpose
22 /// of reporting useful failure to vectorize message.
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
27 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
28 
29 #include "llvm/ADT/MapVector.h"
33 
34 namespace llvm {
35 
36 /// Create an analysis remark that explains why vectorization failed
37 ///
38 /// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
39 /// RemarkName is the identifier for the remark. If \p I is passed it is an
40 /// instruction that prevents vectorization. Otherwise \p TheLoop is used for
41 /// the location of the remark. \return the remark object that can be
42 /// streamed to.
43 OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
44  StringRef RemarkName,
45  Loop *TheLoop,
46  Instruction *I = nullptr);
47 
48 /// Utility class for getting and setting loop vectorizer hints in the form
49 /// of loop metadata.
50 /// This class keeps a number of loop annotations locally (as member variables)
51 /// and can, upon request, write them back as metadata on the loop. It will
52 /// initially scan the loop for existing metadata, and will update the local
53 /// values based on information in the loop.
54 /// We cannot write all values to metadata, as the mere presence of some info,
55 /// for example 'force', means a decision has been made. So, we need to be
56 /// careful NOT to add them if the user hasn't specifically asked so.
58  enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED };
59 
60  /// Hint - associates name and validation with the hint value.
61  struct Hint {
62  const char *Name;
63  unsigned Value; // This may have to change for non-numeric values.
64  HintKind Kind;
65 
66  Hint(const char *Name, unsigned Value, HintKind Kind)
67  : Name(Name), Value(Value), Kind(Kind) {}
68 
69  bool validate(unsigned Val);
70  };
71 
72  /// Vectorization width.
73  Hint Width;
74 
75  /// Vectorization interleave factor.
76  Hint Interleave;
77 
78  /// Vectorization forced
79  Hint Force;
80 
81  /// Already Vectorized
82  Hint IsVectorized;
83 
84  /// Return the loop metadata prefix.
85  static StringRef Prefix() { return "llvm.loop."; }
86 
87  /// True if there is any unsafe math in the loop.
88  bool PotentiallyUnsafe = false;
89 
90 public:
91  enum ForceKind {
92  FK_Undefined = -1, ///< Not selected.
93  FK_Disabled = 0, ///< Forcing disabled.
94  FK_Enabled = 1, ///< Forcing enabled.
95  };
96 
97  LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced,
99 
100  /// Mark the loop L as already vectorized by setting the width to 1.
101  void setAlreadyVectorized();
102 
103  bool allowVectorization(Function *F, Loop *L,
104  bool VectorizeOnlyWhenForced) const;
105 
106  /// Dumps all the hint information.
107  void emitRemarkWithHints() const;
108 
109  unsigned getWidth() const { return Width.Value; }
110  unsigned getInterleave() const { return Interleave.Value; }
111  unsigned getIsVectorized() const { return IsVectorized.Value; }
112  enum ForceKind getForce() const {
113  if ((ForceKind)Force.Value == FK_Undefined &&
115  return FK_Disabled;
116  return (ForceKind)Force.Value;
117  }
118 
119  /// If hints are provided that force vectorization, use the AlwaysPrint
120  /// pass name to force the frontend to print the diagnostic.
121  const char *vectorizeAnalysisPassName() const;
122 
123  bool allowReordering() const {
124  // When enabling loop hints are provided we allow the vectorizer to change
125  // the order of operations that is given by the scalar loop. This is not
126  // enabled by default because can be unsafe or inefficient. For example,
127  // reordering floating-point operations will change the way round-off
128  // error accumulates in the loop.
129  return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
130  }
131 
132  bool isPotentiallyUnsafe() const {
133  // Avoid FP vectorization if the target is unsure about proper support.
134  // This may be related to the SIMD unit in the target not handling
135  // IEEE 754 FP ops properly, or bad single-to-double promotions.
136  // Otherwise, a sequence of vectorized loops, even without reduction,
137  // could lead to different end results on the destination vectors.
138  return getForce() != LoopVectorizeHints::FK_Enabled && PotentiallyUnsafe;
139  }
140 
141  void setPotentiallyUnsafe() { PotentiallyUnsafe = true; }
142 
143 private:
144  /// Find hints specified in the loop metadata and update local values.
145  void getHintsFromMetadata();
146 
147  /// Checks string hint with one operand and set value if valid.
148  void setHint(StringRef Name, Metadata *Arg);
149 
150  /// The loop these hints belong to.
151  const Loop *TheLoop;
152 
153  /// Interface to emit optimization remarks.
155 };
156 
157 /// This holds vectorization requirements that must be verified late in
158 /// the process. The requirements are set by legalize and costmodel. Once
159 /// vectorization has been determined to be possible and profitable the
160 /// requirements can be verified by looking for metadata or compiler options.
161 /// For example, some loops require FP commutativity which is only allowed if
162 /// vectorization is explicitly specified or if the fast-math compiler option
163 /// has been provided.
164 /// Late evaluation of these requirements allows helpful diagnostics to be
165 /// composed that tells the user what need to be done to vectorize the loop. For
166 /// example, by specifying #pragma clang loop vectorize or -ffast-math. Late
167 /// evaluation should be used only when diagnostics can generated that can be
168 /// followed by a non-expert user.
170 public:
172 
174  // First unsafe algebra instruction.
175  if (!UnsafeAlgebraInst)
176  UnsafeAlgebraInst = I;
177  }
178 
179  void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
180 
181  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
182 
183 private:
184  unsigned NumRuntimePointerChecks = 0;
185  Instruction *UnsafeAlgebraInst = nullptr;
186 
187  /// Interface to emit optimization remarks.
189 };
190 
191 /// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
192 /// to what vectorization factor.
193 /// This class does not look at the profitability of vectorization, only the
194 /// legality. This class has two main kinds of checks:
195 /// * Memory checks - The code in canVectorizeMemory checks if vectorization
196 /// will change the order of memory accesses in a way that will change the
197 /// correctness of the program.
198 /// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
199 /// checks for a number of different conditions, such as the availability of a
200 /// single induction variable, that all types are supported and vectorize-able,
201 /// etc. This code reflects the capabilities of InnerLoopVectorizer.
202 /// This class is also used by InnerLoopVectorizer for identifying
203 /// induction variable and the different reduction variables.
205 public:
209  Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
212  AssumptionCache *AC)
213  : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
214  GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
215 
216  /// ReductionList contains the reduction descriptors for all
217  /// of the reductions that were found in the loop.
219 
220  /// InductionList saves induction variables and maps them to the
221  /// induction descriptor.
223 
224  /// RecurrenceSet contains the phi nodes that are recurrences other than
225  /// inductions and reductions.
227 
228  /// Returns true if it is legal to vectorize this loop.
229  /// This does not mean that it is profitable to vectorize this
230  /// loop, only that it is legal to do so.
231  /// Temporarily taking UseVPlanNativePath parameter. If true, take
232  /// the new code path being implemented for outer loop vectorization
233  /// (should be functional for inner loop vectorization) based on VPlan.
234  /// If false, good old LV code.
235  bool canVectorize(bool UseVPlanNativePath);
236 
237  /// Return true if we can vectorize this loop while folding its tail by
238  /// masking.
239  bool canFoldTailByMasking();
240 
241  /// Returns the primary induction variable.
242  PHINode *getPrimaryInduction() { return PrimaryInduction; }
243 
244  /// Returns the reduction variables found in the loop.
245  ReductionList *getReductionVars() { return &Reductions; }
246 
247  /// Returns the induction variables found in the loop.
248  InductionList *getInductionVars() { return &Inductions; }
249 
250  /// Return the first-order recurrences found in the loop.
251  RecurrenceSet *getFirstOrderRecurrences() { return &FirstOrderRecurrences; }
252 
253  /// Return the set of instructions to sink to handle first-order recurrences.
255 
256  /// Returns the widest induction type.
257  Type *getWidestInductionType() { return WidestIndTy; }
258 
259  /// Returns True if V is a Phi node of an induction variable in this loop.
260  bool isInductionPhi(const Value *V);
261 
262  /// Returns True if V is a cast that is part of an induction def-use chain,
263  /// and had been proven to be redundant under a runtime guard (in other
264  /// words, the cast has the same SCEV expression as the induction phi).
265  bool isCastedInductionVariable(const Value *V);
266 
267  /// Returns True if V can be considered as an induction variable in this
268  /// loop. V can be the induction phi, or some redundant cast in the def-use
269  /// chain of the inducion phi.
270  bool isInductionVariable(const Value *V);
271 
272  /// Returns True if PN is a reduction variable in this loop.
273  bool isReductionVariable(PHINode *PN) { return Reductions.count(PN); }
274 
275  /// Returns True if Phi is a first-order recurrence in this loop.
276  bool isFirstOrderRecurrence(const PHINode *Phi);
277 
278  /// Return true if the block BB needs to be predicated in order for the loop
279  /// to be vectorized.
280  bool blockNeedsPredication(BasicBlock *BB);
281 
282  /// Check if this pointer is consecutive when vectorizing. This happens
283  /// when the last index of the GEP is the induction variable, or that the
284  /// pointer itself is an induction variable.
285  /// This check allows us to vectorize A[idx] into a wide load/store.
286  /// Returns:
287  /// 0 - Stride is unknown or non-consecutive.
288  /// 1 - Address is consecutive.
289  /// -1 - Address is consecutive, and decreasing.
290  /// NOTE: This method must only be used before modifying the original scalar
291  /// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
292  int isConsecutivePtr(Value *Ptr);
293 
294  /// Returns true if the value V is uniform within the loop.
295  bool isUniform(Value *V);
296 
297  /// Returns the information that we collected about runtime memory check.
299  return LAI->getRuntimePointerChecking();
300  }
301 
302  const LoopAccessInfo *getLAI() const { return LAI; }
303 
304  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
305 
306  uint64_t getMaxSafeRegisterWidth() const {
307  return LAI->getDepChecker().getMaxSafeRegisterWidth();
308  }
309 
310  bool hasStride(Value *V) { return LAI->hasStride(V); }
311 
312  /// Returns true if vector representation of the instruction \p I
313  /// requires mask.
314  bool isMaskRequired(const Instruction *I) { return (MaskedOp.count(I) != 0); }
315 
316  unsigned getNumStores() const { return LAI->getNumStores(); }
317  unsigned getNumLoads() const { return LAI->getNumLoads(); }
318 
319  // Returns true if the NoNaN attribute is set on the function.
320  bool hasFunNoNaNAttr() const { return HasFunNoNaNAttr; }
321 
322 private:
323  /// Return true if the pre-header, exiting and latch blocks of \p Lp and all
324  /// its nested loops are considered legal for vectorization. These legal
325  /// checks are common for inner and outer loop vectorization.
326  /// Temporarily taking UseVPlanNativePath parameter. If true, take
327  /// the new code path being implemented for outer loop vectorization
328  /// (should be functional for inner loop vectorization) based on VPlan.
329  /// If false, good old LV code.
330  bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath);
331 
332  /// Set up outer loop inductions by checking Phis in outer loop header for
333  /// supported inductions (int inductions). Return false if any of these Phis
334  /// is not a supported induction or if we fail to find an induction.
335  bool setupOuterLoopInductions();
336 
337  /// Return true if the pre-header, exiting and latch blocks of \p Lp
338  /// (non-recursive) are considered legal for vectorization.
339  /// Temporarily taking UseVPlanNativePath parameter. If true, take
340  /// the new code path being implemented for outer loop vectorization
341  /// (should be functional for inner loop vectorization) based on VPlan.
342  /// If false, good old LV code.
343  bool canVectorizeLoopCFG(Loop *Lp, bool UseVPlanNativePath);
344 
345  /// Check if a single basic block loop is vectorizable.
346  /// At this point we know that this is a loop with a constant trip count
347  /// and we only need to check individual instructions.
348  bool canVectorizeInstrs();
349 
350  /// When we vectorize loops we may change the order in which
351  /// we read and write from memory. This method checks if it is
352  /// legal to vectorize the code, considering only memory constrains.
353  /// Returns true if the loop is vectorizable
354  bool canVectorizeMemory();
355 
356  /// Return true if we can vectorize this loop using the IF-conversion
357  /// transformation.
358  bool canVectorizeWithIfConvert();
359 
360  /// Return true if we can vectorize this outer loop. The method performs
361  /// specific checks for outer loop vectorization.
362  bool canVectorizeOuterLoop();
363 
364  /// Return true if all of the instructions in the block can be speculatively
365  /// executed. \p SafePtrs is a list of addresses that are known to be legal
366  /// and we know that we can read from them without segfault.
367  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
368 
369  /// Updates the vectorization state by adding \p Phi to the inductions list.
370  /// This can set \p Phi as the main induction of the loop if \p Phi is a
371  /// better choice for the main induction than the existing one.
372  void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
373  SmallPtrSetImpl<Value *> &AllowedExit);
374 
375  /// If an access has a symbolic strides, this maps the pointer value to
376  /// the stride symbol.
377  const ValueToValueMap *getSymbolicStrides() {
378  // FIXME: Currently, the set of symbolic strides is sometimes queried before
379  // it's collected. This happens from canVectorizeWithIfConvert, when the
380  // pointer is checked to reference consecutive elements suitable for a
381  // masked access.
382  return LAI ? &LAI->getSymbolicStrides() : nullptr;
383  }
384 
385  /// Reports a vectorization illegality: print \p DebugMsg for debugging
386  /// purposes along with the corresponding optimization remark \p RemarkName.
387  /// If \p I is passed it is an instruction that prevents vectorization.
388  /// Otherwise the loop is used for the location of the remark.
389  void reportVectorizationFailure(const StringRef DebugMsg,
390  const StringRef OREMsg, const StringRef ORETag,
391  Instruction *I = nullptr) const;
392 
393  /// The loop that we evaluate.
394  Loop *TheLoop;
395 
396  /// Loop Info analysis.
397  LoopInfo *LI;
398 
399  /// A wrapper around ScalarEvolution used to add runtime SCEV checks.
400  /// Applies dynamic knowledge to simplify SCEV expressions in the context
401  /// of existing SCEV assumptions. The analysis will also add a minimal set
402  /// of new predicates if this is required to enable vectorization and
403  /// unrolling.
405 
406  /// Target Transform Info.
407  TargetTransformInfo *TTI;
408 
409  /// Target Library Info.
410  TargetLibraryInfo *TLI;
411 
412  /// Dominator Tree.
413  DominatorTree *DT;
414 
415  // LoopAccess analysis.
416  std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
417 
418  // And the loop-accesses info corresponding to this loop. This pointer is
419  // null until canVectorizeMemory sets it up.
420  const LoopAccessInfo *LAI = nullptr;
421 
422  /// Interface to emit optimization remarks.
424 
425  // --- vectorization state --- //
426 
427  /// Holds the primary induction variable. This is the counter of the
428  /// loop.
429  PHINode *PrimaryInduction = nullptr;
430 
431  /// Holds the reduction variables.
432  ReductionList Reductions;
433 
434  /// Holds all of the induction variables that we found in the loop.
435  /// Notice that inductions don't need to start at zero and that induction
436  /// variables can be pointers.
437  InductionList Inductions;
438 
439  /// Holds all the casts that participate in the update chain of the induction
440  /// variables, and that have been proven to be redundant (possibly under a
441  /// runtime guard). These casts can be ignored when creating the vectorized
442  /// loop body.
443  SmallPtrSet<Instruction *, 4> InductionCastsToIgnore;
444 
445  /// Holds the phi nodes that are first-order recurrences.
446  RecurrenceSet FirstOrderRecurrences;
447 
448  /// Holds instructions that need to sink past other instructions to handle
449  /// first-order recurrences.
451 
452  /// Holds the widest induction type encountered.
453  Type *WidestIndTy = nullptr;
454 
455  /// Allowed outside users. This holds the induction and reduction
456  /// vars which can be accessed from outside the loop.
457  SmallPtrSet<Value *, 4> AllowedExit;
458 
459  /// Can we assume the absence of NaNs.
460  bool HasFunNoNaNAttr = false;
461 
462  /// Vectorization requirements that will go through late-evaluation.
463  LoopVectorizationRequirements *Requirements;
464 
465  /// Used to emit an analysis of any legality issues.
466  LoopVectorizeHints *Hints;
467 
468  /// The demanded bits analysis is used to compute the minimum type size in
469  /// which a reduction can be computed.
470  DemandedBits *DB;
471 
472  /// The assumption cache analysis is used to compute the minimum type size in
473  /// which a reduction can be computed.
474  AssumptionCache *AC;
475 
476  /// While vectorizing these instructions we have to generate a
477  /// call to the appropriate masked intrinsic
479 };
480 
481 } // namespace llvm
482 
483 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONLEGALITY_H
Type * getWidestInductionType()
Returns the widest induction type.
bool isMaskRequired(const Instruction *I)
Returns true if vector representation of the instruction I requires mask.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F, std::function< const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
A cache of @llvm.assume calls within a function.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
F(f)
const char * vectorizeAnalysisPassName() const
If hints are provided that force vectorization, use the AlwaysPrint pass name to force the frontend t...
ReductionList * getReductionVars()
Returns the reduction variables found in the loop.
PHINode * getPrimaryInduction()
Returns the primary induction variable.
LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool allowVectorization(Function *F, Loop *L, bool VectorizeOnlyWhenForced) const
#define H(x, y, z)
Definition: MD5.cpp:57
OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, Instruction *I=nullptr)
Create an analysis remark that explains why vectorization failed.
bool isReductionVariable(PHINode *PN)
Returns True if PN is a reduction variable in this loop.
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
const RuntimePointerChecking * getRuntimePointerChecking() const
Returns the information that we collected about runtime memory check.
A struct for saving information about induction variables.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This holds vectorization requirements that must be verified late in the process.
Provides information about what library functions are available for the current target.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Drive the analysis of memory accesses in the loop.
const LoopAccessInfo * getLAI() const
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
LoopVectorizeHints(const Loop *L, bool InterleaveOnlyWhenForced, OptimizationRemarkEmitter &ORE)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:467
InductionList * getInductionVars()
Returns the induction variables found in the loop.
#define I(x, y, z)
Definition: MD5.cpp:58
RecurrenceSet * getFirstOrderRecurrences()
Return the first-order recurrences found in the loop.
void setAlreadyVectorized()
Mark the loop L as already vectorized by setting the width to 1.
LLVM Value Representation.
Definition: Value.h:72
print Print MemDeps of function
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
bool hasDisableAllTransformsHint(const Loop *L)
Look for the loop attribute that disables all transformation heuristic.
Definition: LoopUtils.cpp:331
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Root of the metadata hierarchy.
Definition: Metadata.h:57
The optimization diagnostic interface.
DenseMap< Instruction *, Instruction * > & getSinkAfter()
Return the set of instructions to sink to handle first-order recurrences.
void emitRemarkWithHints() const
Dumps all the hint information.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)