LLVM 22.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
43class LoopVersioning;
47class VPRecipeBuilder;
48struct VFRange;
49
52
53/// VPlan-based builder utility analogous to IRBuilder.
54class VPBuilder {
55 VPBasicBlock *BB = nullptr;
57
58 /// Insert \p VPI in BB at InsertPt if BB is set.
59 template <typename T> T *tryInsertInstruction(T *R) {
60 if (BB)
61 BB->insert(R, InsertPt);
62 return R;
63 }
64
65 VPInstruction *createInstruction(unsigned Opcode,
67 const Twine &Name = "") {
68 return tryInsertInstruction(new VPInstruction(Opcode, Operands, DL, Name));
69 }
70
71public:
72 VPBuilder() = default;
73 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
74 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
78
79 /// Clear the insertion point: created instructions will not be inserted into
80 /// a block.
82 BB = nullptr;
83 InsertPt = VPBasicBlock::iterator();
84 }
85
86 VPBasicBlock *getInsertBlock() const { return BB; }
87 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
88
89 /// Create a VPBuilder to insert after \p R.
92 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
93 return B;
94 }
95
96 /// InsertPoint - A saved insertion point.
98 VPBasicBlock *Block = nullptr;
100
101 public:
102 /// Creates a new insertion point which doesn't point to anything.
103 VPInsertPoint() = default;
104
105 /// Creates a new insertion point at the given location.
107 : Block(InsertBlock), Point(InsertPoint) {}
108
109 /// Returns true if this insert point is set.
110 bool isSet() const { return Block != nullptr; }
111
112 VPBasicBlock *getBlock() const { return Block; }
113 VPBasicBlock::iterator getPoint() const { return Point; }
114 };
115
116 /// Sets the current insert point to a previously-saved location.
118 if (IP.isSet())
119 setInsertPoint(IP.getBlock(), IP.getPoint());
120 else
122 }
123
124 /// This specifies that created VPInstructions should be appended to the end
125 /// of the specified block.
127 assert(TheBB && "Attempting to set a null insert point");
128 BB = TheBB;
129 InsertPt = BB->end();
130 }
131
132 /// This specifies that created instructions should be inserted at the
133 /// specified point.
135 BB = TheBB;
136 InsertPt = IP;
137 }
138
139 /// This specifies that created instructions should be inserted at the
140 /// specified point.
142 BB = IP->getParent();
143 InsertPt = IP->getIterator();
144 }
145
146 /// Insert \p R at the current insertion point.
147 void insert(VPRecipeBase *R) { BB->insert(R, InsertPt); }
148
149 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
150 /// its underlying Instruction.
152 Instruction *Inst = nullptr,
153 const Twine &Name = "") {
155 if (Inst)
156 DL = Inst->getDebugLoc();
157 VPInstruction *NewVPInst = createInstruction(Opcode, Operands, DL, Name);
158 NewVPInst->setUnderlyingValue(Inst);
159 return NewVPInst;
160 }
162 DebugLoc DL, const Twine &Name = "") {
163 return createInstruction(Opcode, Operands, DL, Name);
164 }
166 const VPIRFlags &Flags,
168 const Twine &Name = "") {
169 return tryInsertInstruction(
170 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
171 }
172
174 Type *ResultTy, const VPIRFlags &Flags = {},
176 const Twine &Name = "") {
177 return tryInsertInstruction(
178 new VPInstructionWithType(Opcode, Operands, ResultTy, Flags, DL, Name));
179 }
180
182 ArrayRef<VPValue *> Operands,
185 const Twine &Name = "") {
186 return tryInsertInstruction(
187 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
188 }
189
192 const Twine &Name = "") {
193 return createInstruction(VPInstruction::Not, {Operand}, DL, Name);
194 }
195
198 const Twine &Name = "") {
199 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, DL, Name);
200 }
201
204 const Twine &Name = "") {
205
206 return tryInsertInstruction(new VPInstruction(
207 Instruction::BinaryOps::Or, {LHS, RHS},
208 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
209 }
210
213 const Twine &Name = "") {
214 return tryInsertInstruction(
216 }
217
219 createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
220 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
221 std::optional<FastMathFlags> FMFs = std::nullopt) {
222 auto *Select =
223 FMFs ? new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
224 *FMFs, {}, DL, Name)
225 : new VPInstruction(Instruction::Select, {Cond, TrueVal, FalseVal},
226 DL, Name);
227 return tryInsertInstruction(Select);
228 }
229
230 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
231 /// and \p B.
234 const Twine &Name = "") {
236 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
237 return tryInsertInstruction(
238 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
239 }
240
241 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
242 /// and \p B.
245 const Twine &Name = "") {
247 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
248 return tryInsertInstruction(
249 new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));
250 }
251
254 const Twine &Name = "") {
255 return tryInsertInstruction(
257 GEPNoWrapFlags::none(), {}, DL, Name));
258 }
259
261 GEPNoWrapFlags GEPFlags,
263 const Twine &Name = "") {
264 return tryInsertInstruction(new VPInstruction(
265 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
266 }
267
270 const Twine &Name = "") {
271 return tryInsertInstruction(
273 GEPNoWrapFlags::none(), {}, DL, Name));
274 }
275
277 const Twine &Name = "") {
278 return tryInsertInstruction(new VPPhi(IncomingValues, DL, Name));
279 }
280
282 VPlan &Plan = *getInsertBlock()->getPlan();
283 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
284 if (EC.isScalable()) {
285 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
286 RuntimeEC = EC.getKnownMinValue() == 1
287 ? VScale
288 : createOverflowingOp(Instruction::Mul,
289 {VScale, RuntimeEC}, {true, false});
290 }
291 return RuntimeEC;
292 }
293
294 /// Convert the input value \p Current to the corresponding value of an
295 /// induction with \p Start and \p Step values, using \p Start + \p Current *
296 /// \p Step.
298 FPMathOperator *FPBinOp, VPValue *Start,
299 VPValue *Current, VPValue *Step,
300 const Twine &Name = "") {
301 return tryInsertInstruction(
302 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
303 }
304
306 Type *ResultTy, DebugLoc DL,
307 const VPIRFlags &Flags = {},
308 const VPIRMetadata &Metadata = {}) {
309 return tryInsertInstruction(
310 new VPInstructionWithType(Opcode, Op, ResultTy, DL, Flags, Metadata));
311 }
312
314 DebugLoc DL) {
315 if (ResultTy == SrcTy)
316 return Op;
317 Instruction::CastOps CastOp =
318 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
319 ? Instruction::Trunc
320 : Instruction::ZExt;
321 return createScalarCast(CastOp, Op, ResultTy, DL);
322 }
323
325 Type *ResultTy) {
326 VPIRFlags Flags;
327 if (Opcode == Instruction::Trunc)
328 Flags = VPIRFlags::TruncFlagsTy(false, false);
329 else if (Opcode == Instruction::ZExt)
330 Flags = VPIRFlags::NonNegFlagsTy(false);
331 return tryInsertInstruction(
332 new VPWidenCastRecipe(Opcode, Op, ResultTy, Flags));
333 }
334
337 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
338 VPValue *VF, DebugLoc DL) {
339 return tryInsertInstruction(new VPScalarIVStepsRecipe(
340 IV, Step, VF, InductionOpcode,
341 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
342 }
343
345 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
346 }
347
348 //===--------------------------------------------------------------------===//
349 // RAII helpers.
350 //===--------------------------------------------------------------------===//
351
352 /// RAII object that stores the current insertion point and restores it when
353 /// the object is destroyed.
355 VPBuilder &Builder;
356 VPBasicBlock *Block;
358
359 public:
361 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
362
365
366 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
367 };
368};
369
370/// TODO: The following VectorizationFactor was pulled out of
371/// LoopVectorizationCostModel class. LV also deals with
372/// VectorizerParams::VectorizationFactor.
373/// We need to streamline them.
374
375/// Information about vectorization costs.
377 /// Vector width with best cost.
379
380 /// Cost of the loop with that width.
382
383 /// Cost of the scalar loop.
385
386 /// The minimum trip count required to make vectorization profitable, e.g. due
387 /// to runtime checks.
389
393
394 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
396 return {ElementCount::getFixed(1), 0, 0};
397 }
398
399 bool operator==(const VectorizationFactor &rhs) const {
400 return Width == rhs.Width && Cost == rhs.Cost;
401 }
402
403 bool operator!=(const VectorizationFactor &rhs) const {
404 return !(*this == rhs);
405 }
406};
407
408/// A class that represents two vectorization factors (initialized with 0 by
409/// default). One for fixed-width vectorization and one for scalable
410/// vectorization. This can be used by the vectorizer to choose from a range of
411/// fixed and/or scalable VFs in order to find the most cost-effective VF to
412/// vectorize with.
416
418 : FixedVF(ElementCount::getFixed(0)),
419 ScalableVF(ElementCount::getScalable(0)) {}
421 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
422 }
426 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
427 "Invalid scalable properties");
428 }
429
431
432 /// \return true if either fixed- or scalable VF is non-zero.
433 explicit operator bool() const { return FixedVF || ScalableVF; }
434
435 /// \return true if either fixed- or scalable VF is a valid vector VF.
436 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
437};
438
439/// Planner drives the vectorization process after having passed
440/// Legality checks.
442 /// The loop that we evaluate.
443 Loop *OrigLoop;
444
445 /// Loop Info analysis.
446 LoopInfo *LI;
447
448 /// The dominator tree.
449 DominatorTree *DT;
450
451 /// Target Library Info.
452 const TargetLibraryInfo *TLI;
453
454 /// Target Transform Info.
455 const TargetTransformInfo &TTI;
456
457 /// The legality analysis.
459
460 /// The profitability analysis.
462
463 /// The interleaved access analysis.
465
467
468 const LoopVectorizeHints &Hints;
469
471
473
474 /// Profitable vector factors.
476
477 /// A builder used to construct the current plan.
478 VPBuilder Builder;
479
480 /// Computes the cost of \p Plan for vectorization factor \p VF.
481 ///
482 /// The current implementation requires access to the
483 /// LoopVectorizationLegality to handle inductions and reductions, which is
484 /// why it is kept separate from the VPlan-only cost infrastructure.
485 ///
486 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
487 /// been retired.
488 InstructionCost cost(VPlan &Plan, ElementCount VF) const;
489
490 /// Precompute costs for certain instructions using the legacy cost model. The
491 /// function is used to bring up the VPlan-based cost model to initially avoid
492 /// taking different decisions due to inaccuracies in the legacy cost model.
493 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
494 VPCostContext &CostCtx) const;
495
496public:
498 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
503 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
504 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
505
506 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
507 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
508 /// interleaving should be avoided up-front, no plans are generated.
509 void plan(ElementCount UserVF, unsigned UserIC);
510
511 /// Use the VPlan-native path to plan how to best vectorize, return the best
512 /// VF and its cost.
514
515 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
516 /// for each VF.
517 VPlan &getPlanFor(ElementCount VF) const;
518
519 /// Compute and return the most profitable vectorization factor. Also collect
520 /// all profitable VFs in ProfitableVFs.
522
523 /// \return The desired interleave count.
524 /// If interleave count has been specified by metadata it will be returned.
525 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
526 /// are the selected vectorization factor and the cost of the selected VF.
527 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
528 InstructionCost LoopCost);
529
530 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
531 /// according to the best selected \p VF and \p UF.
532 ///
533 /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
534 /// epilogue vector loop. It should be removed once the re-use issue has been
535 /// fixed.
536 ///
537 /// Returns a mapping of SCEVs to their expanded IR values.
538 /// Note that this is a temporary workaround needed due to the current
539 /// epilogue handling.
541 VPlan &BestPlan,
543 DominatorTree *DT,
544 bool VectorizingEpilogue);
545
546#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
547 void printPlans(raw_ostream &O);
548#endif
549
550 /// Look through the existing plans and return true if we have one with
551 /// vectorization factor \p VF.
553 return any_of(VPlans,
554 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
555 }
556
557 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
558 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
559 /// returned value holds for the entire \p Range.
560 static bool
561 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
562 VFRange &Range);
563
564 /// \return The most profitable vectorization factor and the cost of that VF
565 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
566 /// epilogue vectorization is not supported for the loop.
568 selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
569
570 /// Emit remarks for recipes with invalid costs in the available VPlans.
572
573 /// Create a check to \p Plan to see if the vector loop should be executed
574 /// based on its trip count.
575 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
576 ElementCount MinProfitableTripCount) const;
577
578 /// Update loop metadata and profile info for both the scalar remainder loop
579 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
580 /// loop on the vector loop and replaces vectorizer-specific metadata. The
581 /// loop ID of the original loop \p OrigLoopID must be passed, together with
582 /// the average trip count and invocation weight of the original loop (\p
583 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
584 /// cannot be retrieved after the plan has been executed, as the original loop
585 /// may have been removed.
587 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
588 bool VectorizingEpilogue, MDNode *OrigLoopID,
589 std::optional<unsigned> OrigAverageTripCount,
590 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
591 bool DisableRuntimeUnroll);
592
593protected:
594 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
595 /// according to the information gathered by Legal when it checked if it is
596 /// legal to vectorize the loop.
597 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
598
599private:
600 /// Build a VPlan according to the information gathered by Legal. \return a
601 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
602 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
603 /// the input range, set the largest included VF to the maximum VF for which
604 /// no plan could be built.
605 VPlanPtr tryToBuildVPlan(VFRange &Range);
606
607 /// Build a VPlan using VPRecipes according to the information gather by
608 /// Legal. This method is only used for the legacy inner loop vectorizer.
609 /// \p Range's largest included VF is restricted to the maximum VF the
610 /// returned VPlan is valid for. If no VPlan can be built for the input range,
611 /// set the largest included VF to the maximum VF for which no plan could be
612 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
613 /// is a plain CFG VPlan wrapping the original scalar loop.
614 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
615 LoopVersioning *LVer);
616
617 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
618 /// according to the information gathered by Legal when it checked if it is
619 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
620 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
621
622 // Adjust the recipes for reductions. For in-loop reductions the chain of
623 // instructions leading from the loop exit instr to the phi need to be
624 // converted to reductions, with one operand being vector and the other being
625 // the scalar reduction chain. For other reductions, a select is introduced
626 // between the phi and users outside the vector region when folding the tail.
627 void adjustRecipesForReductions(VPlanPtr &Plan,
628 VPRecipeBuilder &RecipeBuilder,
629 ElementCount MinVF);
630
631 /// Attach the runtime checks of \p RTChecks to \p Plan.
632 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
633 bool HasBranchWeights) const;
634
635#ifndef NDEBUG
636 /// \return The most profitable vectorization factor for the available VPlans
637 /// and the cost of that VF.
638 /// This is now only used to verify the decisions by the new VPlan-based
639 /// cost-model and will be retired once the VPlan-based cost-model is
640 /// stabilized.
641 VectorizationFactor selectVectorizationFactor();
642#endif
643
644 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
645 /// that of B.
646 bool isMoreProfitable(const VectorizationFactor &A,
647 const VectorizationFactor &B, bool HasTail,
648 bool IsEpilogue = false) const;
649
650 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
651 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
652 bool isMoreProfitable(const VectorizationFactor &A,
653 const VectorizationFactor &B,
654 const unsigned MaxTripCount, bool HasTail,
655 bool IsEpilogue = false) const;
656
657 /// Determines if we have the infrastructure to vectorize the loop and its
658 /// epilogue, assuming the main loop is vectorized by \p VF.
659 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
660};
661
662} // namespace llvm
663
664#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:310
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1592
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1643
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1576
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1557
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1721
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3823
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3850
iterator end()
Definition VPlan.h:3860
VPlan * getPlan()
Definition VPlan.cpp:165
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPBuilder(VPRecipeBase *InsertPt)
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3644
Recipe to expand a SCEV expression.
Definition VPlan.h:3442
Class to record and manage LLVM IR flags.
Definition VPlan.h:596
Helper to manage IR metadata for recipes.
Definition VPlan.h:938
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1192
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:979
@ VScale
Returns the value for vscale.
Definition VPlan.h:1068
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
VPBasicBlock * getParent()
Definition VPlan.h:407
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3713
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:186
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1494
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4141
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4410
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
cl::opt< unsigned > ForceTargetInstructionCost
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
Definition VPlan.cpp:56
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)