LLVM 19.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace llvm {
32
33class LoopInfo;
34class DominatorTree;
35class LoopVectorizationLegality;
36class LoopVectorizationCostModel;
37class PredicatedScalarEvolution;
38class LoopVectorizeHints;
39class OptimizationRemarkEmitter;
40class TargetTransformInfo;
41class TargetLibraryInfo;
42class VPRecipeBuilder;
43
44/// VPlan-based builder utility analogous to IRBuilder.
45class VPBuilder {
46 VPBasicBlock *BB = nullptr;
48
49 /// Insert \p VPI in BB at InsertPt if BB is set.
50 VPInstruction *tryInsertInstruction(VPInstruction *VPI) {
51 if (BB)
52 BB->insert(VPI, InsertPt);
53 return VPI;
54 }
55
56 VPInstruction *createInstruction(unsigned Opcode,
58 const Twine &Name = "") {
59 return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name));
60 }
61
62 VPInstruction *createInstruction(unsigned Opcode,
63 std::initializer_list<VPValue *> Operands,
64 DebugLoc DL, const Twine &Name = "") {
65 return createInstruction(Opcode, ArrayRef<VPValue *>(Operands), DL, Name);
66 }
67
68public:
69 VPBuilder() = default;
70 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
71
72 /// Clear the insertion point: created instructions will not be inserted into
73 /// a block.
75 BB = nullptr;
76 InsertPt = VPBasicBlock::iterator();
77 }
78
79 VPBasicBlock *getInsertBlock() const { return BB; }
80 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
81
82 /// InsertPoint - A saved insertion point.
84 VPBasicBlock *Block = nullptr;
86
87 public:
88 /// Creates a new insertion point which doesn't point to anything.
89 VPInsertPoint() = default;
90
91 /// Creates a new insertion point at the given location.
93 : Block(InsertBlock), Point(InsertPoint) {}
94
95 /// Returns true if this insert point is set.
96 bool isSet() const { return Block != nullptr; }
97
98 VPBasicBlock *getBlock() const { return Block; }
99 VPBasicBlock::iterator getPoint() const { return Point; }
100 };
101
102 /// Sets the current insert point to a previously-saved location.
104 if (IP.isSet())
105 setInsertPoint(IP.getBlock(), IP.getPoint());
106 else
108 }
109
110 /// This specifies that created VPInstructions should be appended to the end
111 /// of the specified block.
113 assert(TheBB && "Attempting to set a null insert point");
114 BB = TheBB;
115 InsertPt = BB->end();
116 }
117
118 /// This specifies that created instructions should be inserted at the
119 /// specified point.
121 BB = TheBB;
122 InsertPt = IP;
123 }
124
125 /// This specifies that created instructions should be inserted at the
126 /// specified point.
128 BB = IP->getParent();
129 InsertPt = IP->getIterator();
130 }
131
132 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
133 /// its underlying Instruction.
135 Instruction *Inst = nullptr, const Twine &Name = "") {
136 DebugLoc DL;
137 if (Inst)
138 DL = Inst->getDebugLoc();
139 VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
140 NewVPInst->setUnderlyingValue(Inst);
141 return NewVPInst;
142 }
144 DebugLoc DL, const Twine &Name = "") {
145 return createInstruction(Opcode, Operands, DL, Name);
146 }
147
149 std::initializer_list<VPValue *> Operands,
151 DebugLoc DL = {}, const Twine &Name = "") {
152 return tryInsertInstruction(
153 new VPInstruction(Opcode, Operands, WrapFlags, DL, Name));
154 }
156 const Twine &Name = "") {
157 return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
158 }
159
161 const Twine &Name = "") {
162 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
163 }
164
166 const Twine &Name = "") {
167 return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS}, DL, Name);
168 }
169
171 DebugLoc DL = {}, const Twine &Name = "",
172 std::optional<FastMathFlags> FMFs = std::nullopt) {
173 auto *Select =
174 FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
175 *FMFs, DL, Name)
176 : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
177 DL, Name);
178 return tryInsertInstruction(Select);
179 }
180
181 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
182 /// and \p B.
183 /// TODO: add createFCmp when needed.
184 VPValue *createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B,
185 DebugLoc DL = {}, const Twine &Name = "");
186
187 //===--------------------------------------------------------------------===//
188 // RAII helpers.
189 //===--------------------------------------------------------------------===//
190
191 /// RAII object that stores the current insertion point and restores it when
192 /// the object is destroyed.
194 VPBuilder &Builder;
195 VPBasicBlock *Block;
197
198 public:
200 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
201
204
205 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
206 };
207};
208
209/// TODO: The following VectorizationFactor was pulled out of
210/// LoopVectorizationCostModel class. LV also deals with
211/// VectorizerParams::VectorizationFactor and VectorizationCostTy.
212/// We need to streamline them.
213
214/// Information about vectorization costs.
216 /// Vector width with best cost.
218
219 /// Cost of the loop with that width.
221
222 /// Cost of the scalar loop.
224
225 /// The minimum trip count required to make vectorization profitable, e.g. due
226 /// to runtime checks.
228
232
233 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
235 return {ElementCount::getFixed(1), 0, 0};
236 }
237
238 bool operator==(const VectorizationFactor &rhs) const {
239 return Width == rhs.Width && Cost == rhs.Cost;
240 }
241
242 bool operator!=(const VectorizationFactor &rhs) const {
243 return !(*this == rhs);
244 }
245};
246
247/// ElementCountComparator creates a total ordering for ElementCount
248/// for the purposes of using it in a set structure.
250 bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
251 return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
252 std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
253 }
254};
256
257/// A class that represents two vectorization factors (initialized with 0 by
258/// default). One for fixed-width vectorization and one for scalable
259/// vectorization. This can be used by the vectorizer to choose from a range of
260/// fixed and/or scalable VFs in order to find the most cost-effective VF to
261/// vectorize with.
265
267 : FixedVF(ElementCount::getFixed(0)),
268 ScalableVF(ElementCount::getScalable(0)) {}
270 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
271 }
276 "Invalid scalable properties");
277 }
278
280
281 /// \return true if either fixed- or scalable VF is non-zero.
282 explicit operator bool() const { return FixedVF || ScalableVF; }
283
284 /// \return true if either fixed- or scalable VF is a valid vector VF.
285 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
286};
287
288/// Planner drives the vectorization process after having passed
289/// Legality checks.
291 /// The loop that we evaluate.
292 Loop *OrigLoop;
293
294 /// Loop Info analysis.
295 LoopInfo *LI;
296
297 /// The dominator tree.
298 DominatorTree *DT;
299
300 /// Target Library Info.
301 const TargetLibraryInfo *TLI;
302
303 /// Target Transform Info.
305
306 /// The legality analysis.
308
309 /// The profitability analysis.
311
312 /// The interleaved access analysis.
314
316
317 const LoopVectorizeHints &Hints;
318
320
322
323 /// Profitable vector factors.
325
326 /// A builder used to construct the current plan.
327 VPBuilder Builder;
328
329public:
331 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
336 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
337 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
338
339 /// Plan how to best vectorize, return the best VF and its cost, or
340 /// std::nullopt if vectorization and interleaving should be avoided up front.
341 std::optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
342
343 /// Use the VPlan-native path to plan how to best vectorize, return the best
344 /// VF and its cost.
346
347 /// Return the best VPlan for \p VF.
349
350 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
351 /// according to the best selected \p VF and \p UF.
352 ///
353 /// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
354 /// vectorization re-using plans for both the main and epilogue vector loops.
355 /// It should be removed once the re-use issue has been fixed.
356 /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
357 /// to re-use expansion results generated during main plan execution.
358 ///
359 /// Returns a mapping of SCEVs to their expanded IR values and a mapping for
360 /// the reduction resume values. Note that this is a temporary workaround
361 /// needed due to the current epilogue handling.
362 std::pair<DenseMap<const SCEV *, Value *>,
364 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
366 bool IsEpilogueVectorization,
367 const DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
368
369#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
370 void printPlans(raw_ostream &O);
371#endif
372
373 /// Look through the existing plans and return true if we have one with
374 /// vectorization factor \p VF.
376 return any_of(VPlans,
377 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
378 }
379
380 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
381 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
382 /// returned value holds for the entire \p Range.
383 static bool
384 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
385 VFRange &Range);
386
387 /// \return The most profitable vectorization factor and the cost of that VF
388 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
389 /// epilogue vectorization is not supported for the loop.
391 selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
392
393protected:
394 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
395 /// according to the information gathered by Legal when it checked if it is
396 /// legal to vectorize the loop.
397 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
398
399private:
400 /// Build a VPlan according to the information gathered by Legal. \return a
401 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
402 /// exclusive, possibly decreasing \p Range.End.
403 VPlanPtr buildVPlan(VFRange &Range);
404
405 /// Build a VPlan using VPRecipes according to the information gather by
406 /// Legal. This method is only used for the legacy inner loop vectorizer.
407 /// \p Range's largest included VF is restricted to the maximum VF the
408 /// returned VPlan is valid for. If no VPlan can be built for the input range,
409 /// set the largest included VF to the maximum VF for which no plan could be
410 /// built.
411 VPlanPtr tryToBuildVPlanWithVPRecipes(VFRange &Range);
412
413 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
414 /// according to the information gathered by Legal when it checked if it is
415 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
416 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
417
418 // Adjust the recipes for reductions. For in-loop reductions the chain of
419 // instructions leading from the loop exit instr to the phi need to be
420 // converted to reductions, with one operand being vector and the other being
421 // the scalar reduction chain. For other reductions, a select is introduced
422 // between the phi and live-out recipes when folding the tail.
423 void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
424 VPRecipeBuilder &RecipeBuilder,
425 ElementCount MinVF);
426
427 /// \return The most profitable vectorization factor and the cost of that VF.
428 /// This method checks every VF in \p CandidateVFs.
430 selectVectorizationFactor(const ElementCountSet &CandidateVFs);
431
432 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
433 /// that of B.
434 bool isMoreProfitable(const VectorizationFactor &A,
435 const VectorizationFactor &B) const;
436
437 /// Determines if we have the infrastructure to vectorize the loop and its
438 /// epilogue, assuming the main loop is vectorized by \p VF.
439 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
440};
441
442} // namespace llvm
443
444#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
std::string Name
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
mir Rename Register Operands
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:780
A debug info location.
Definition: DebugLoc.h:33
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:311
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:581
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Planner drives the vectorization process after having passed Legality checks.
std::optional< VectorizationFactor > plan(ElementCount UserVF, unsigned UserIC)
Plan how to best vectorize, return the best VF and its cost, or std::nullopt if vectorization and int...
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
std::pair< DenseMap< const SCEV *, Value * >, DenseMap< const RecurrenceDescriptor *, Value * > > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool IsEpilogueVectorization, const DenseMap< const SCEV *, Value * > *ExpandedSCEVs=nullptr)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
VPlan & getBestPlanFor(ElementCount VF) const
Return the best VPlan for VF.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
void printPlans(raw_ostream &O)
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2585
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2606
iterator end()
Definition: VPlan.h:2616
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2644
RAII object that stores the current insertion point and restores it when the object is destroyed.
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPValue * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPBasicBlock::iterator getInsertPoint() const
VPBuilder(VPBasicBlock *InsertBB)
VPValue * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPValue * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL={}, const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPValue * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, std::initializer_list< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
VPValue * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL={}, const Twine &Name="")
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPValue * createNot(VPValue *Operand, DebugLoc DL={}, const Twine &Name="")
VPBuilder()=default
VPValue * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL={}, const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1137
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:711
VPBasicBlock * getParent()
Definition: VPlan.h:736
Helper class to create VPRecipies from IR instructions.
void setUnderlyingValue(Value *Val)
Definition: VPlanValue.h:77
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:2819
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:134
ElementCountComparator creates a total ordering for ElementCount for the purposes of using it in a se...
bool operator()(const ElementCount &LHS, const ElementCount &RHS) const
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:87
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)