LLVM 22.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
43class LoopVersioning;
47class VPRecipeBuilder;
48struct VFRange;
49
52
53/// VPlan-based builder utility analogous to IRBuilder.
54class VPBuilder {
55 VPBasicBlock *BB = nullptr;
57
58 /// Insert \p VPI in BB at InsertPt if BB is set.
59 template <typename T> T *tryInsertInstruction(T *R) {
60 if (BB)
61 BB->insert(R, InsertPt);
62 return R;
63 }
64
65 VPInstruction *createInstruction(unsigned Opcode,
66 ArrayRef<VPValue *> Operands,
67 const VPIRMetadata &MD, DebugLoc DL,
68 const Twine &Name = "") {
69 return tryInsertInstruction(
70 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
71 }
72
73public:
74 VPBuilder() = default;
75 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
76 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
80
81 /// Clear the insertion point: created instructions will not be inserted into
82 /// a block.
84 BB = nullptr;
85 InsertPt = VPBasicBlock::iterator();
86 }
87
88 VPBasicBlock *getInsertBlock() const { return BB; }
89 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
90
91 /// Create a VPBuilder to insert after \p R.
94 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
95 return B;
96 }
97
98 /// InsertPoint - A saved insertion point.
100 VPBasicBlock *Block = nullptr;
102
103 public:
104 /// Creates a new insertion point which doesn't point to anything.
105 VPInsertPoint() = default;
106
107 /// Creates a new insertion point at the given location.
109 : Block(InsertBlock), Point(InsertPoint) {}
110
111 /// Returns true if this insert point is set.
112 bool isSet() const { return Block != nullptr; }
113
114 VPBasicBlock *getBlock() const { return Block; }
115 VPBasicBlock::iterator getPoint() const { return Point; }
116 };
117
118 /// Sets the current insert point to a previously-saved location.
120 if (IP.isSet())
121 setInsertPoint(IP.getBlock(), IP.getPoint());
122 else
124 }
125
126 /// This specifies that created VPInstructions should be appended to the end
127 /// of the specified block.
129 assert(TheBB && "Attempting to set a null insert point");
130 BB = TheBB;
131 InsertPt = BB->end();
132 }
133
134 /// This specifies that created instructions should be inserted at the
135 /// specified point.
137 BB = TheBB;
138 InsertPt = IP;
139 }
140
141 /// This specifies that created instructions should be inserted at the
142 /// specified point.
144 BB = IP->getParent();
145 InsertPt = IP->getIterator();
146 }
147
148 /// Insert \p R at the current insertion point.
149 void insert(VPRecipeBase *R) { BB->insert(R, InsertPt); }
150
151 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
152 /// its underlying Instruction.
154 Instruction *Inst = nullptr,
155 const VPIRFlags &Flags = {},
156 const VPIRMetadata &MD = {},
158 const Twine &Name = "") {
159 VPInstruction *NewVPInst = tryInsertInstruction(
160 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
161 NewVPInst->setUnderlyingValue(Inst);
162 return NewVPInst;
163 }
165 DebugLoc DL, const Twine &Name = "") {
166 return createInstruction(Opcode, Operands, {}, DL, Name);
167 }
169 const VPIRFlags &Flags,
171 const Twine &Name = "") {
172 return tryInsertInstruction(
173 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
174 }
175
177 Type *ResultTy, const VPIRFlags &Flags = {},
179 const Twine &Name = "") {
180 return tryInsertInstruction(new VPInstructionWithType(
181 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
182 }
183
185 unsigned Opcode, ArrayRef<VPValue *> Operands,
186 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
187 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
188 return tryInsertInstruction(
189 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
190 }
191
194 const Twine &Name = "") {
195 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
196 }
197
200 const Twine &Name = "") {
201 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
202 Name);
203 }
204
207 const Twine &Name = "") {
208
209 return tryInsertInstruction(new VPInstruction(
210 Instruction::BinaryOps::Or, {LHS, RHS},
211 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
212 }
213
219
221 createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal,
222 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
223 std::optional<FastMathFlags> FMFs = std::nullopt) {
224 if (!FMFs)
225 return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal}, DL,
226 Name);
227 return tryInsertInstruction(new VPInstruction(
228 Instruction::Select, {Cond, TrueVal, FalseVal}, *FMFs, {}, DL, Name));
229 }
230
231 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
232 /// and \p B.
235 const Twine &Name = "") {
237 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
238 return tryInsertInstruction(
239 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
240 }
241
242 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
243 /// and \p B.
246 const Twine &Name = "") {
248 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
249 return tryInsertInstruction(
250 new VPInstruction(Instruction::FCmp, {A, B}, Pred, {}, DL, Name));
251 }
252
255 const Twine &Name = "") {
256 return tryInsertInstruction(
258 GEPNoWrapFlags::none(), {}, DL, Name));
259 }
260
262 GEPNoWrapFlags GEPFlags,
264 const Twine &Name = "") {
265 return tryInsertInstruction(new VPInstruction(
266 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
267 }
268
271 const Twine &Name = "") {
272 return tryInsertInstruction(
274 GEPNoWrapFlags::none(), {}, DL, Name));
275 }
276
278 const Twine &Name = "") {
279 return tryInsertInstruction(new VPPhi(IncomingValues, DL, Name));
280 }
281
283 VPlan &Plan = *getInsertBlock()->getPlan();
284 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
285 if (EC.isScalable()) {
286 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
287 RuntimeEC = EC.getKnownMinValue() == 1
288 ? VScale
289 : createOverflowingOp(Instruction::Mul,
290 {VScale, RuntimeEC}, {true, false});
291 }
292 return RuntimeEC;
293 }
294
295 /// Convert the input value \p Current to the corresponding value of an
296 /// induction with \p Start and \p Step values, using \p Start + \p Current *
297 /// \p Step.
299 FPMathOperator *FPBinOp, VPValue *Start,
300 VPValue *Current, VPValue *Step,
301 const Twine &Name = "") {
302 return tryInsertInstruction(
303 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
304 }
305
307 Type *ResultTy, DebugLoc DL,
308 const VPIRFlags &Flags = {},
309 const VPIRMetadata &Metadata = {}) {
310 return tryInsertInstruction(
311 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
312 }
313
315 DebugLoc DL) {
316 if (ResultTy == SrcTy)
317 return Op;
318 Instruction::CastOps CastOp =
319 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
320 ? Instruction::Trunc
321 : Instruction::ZExt;
322 return createScalarCast(CastOp, Op, ResultTy, DL);
323 }
324
326 Type *ResultTy) {
327 VPIRFlags Flags;
328 if (Opcode == Instruction::Trunc)
329 Flags = VPIRFlags::TruncFlagsTy(false, false);
330 else if (Opcode == Instruction::ZExt)
331 Flags = VPIRFlags::NonNegFlagsTy(false);
332 return tryInsertInstruction(
333 new VPWidenCastRecipe(Opcode, Op, ResultTy, nullptr, Flags));
334 }
335
338 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
339 VPValue *VF, DebugLoc DL) {
340 return tryInsertInstruction(new VPScalarIVStepsRecipe(
341 IV, Step, VF, InductionOpcode,
342 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
343 }
344
346 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
347 }
348
349 //===--------------------------------------------------------------------===//
350 // RAII helpers.
351 //===--------------------------------------------------------------------===//
352
353 /// RAII object that stores the current insertion point and restores it when
354 /// the object is destroyed.
356 VPBuilder &Builder;
357 VPBasicBlock *Block;
359
360 public:
362 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
363
366
367 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
368 };
369};
370
371/// TODO: The following VectorizationFactor was pulled out of
372/// LoopVectorizationCostModel class. LV also deals with
373/// VectorizerParams::VectorizationFactor.
374/// We need to streamline them.
375
376/// Information about vectorization costs.
378 /// Vector width with best cost.
380
381 /// Cost of the loop with that width.
383
384 /// Cost of the scalar loop.
386
387 /// The minimum trip count required to make vectorization profitable, e.g. due
388 /// to runtime checks.
390
394
395 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
397 return {ElementCount::getFixed(1), 0, 0};
398 }
399
400 bool operator==(const VectorizationFactor &rhs) const {
401 return Width == rhs.Width && Cost == rhs.Cost;
402 }
403
404 bool operator!=(const VectorizationFactor &rhs) const {
405 return !(*this == rhs);
406 }
407};
408
409/// A class that represents two vectorization factors (initialized with 0 by
410/// default). One for fixed-width vectorization and one for scalable
411/// vectorization. This can be used by the vectorizer to choose from a range of
412/// fixed and/or scalable VFs in order to find the most cost-effective VF to
413/// vectorize with.
417
419 : FixedVF(ElementCount::getFixed(0)),
420 ScalableVF(ElementCount::getScalable(0)) {}
422 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
423 }
427 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
428 "Invalid scalable properties");
429 }
430
432
433 /// \return true if either fixed- or scalable VF is non-zero.
434 explicit operator bool() const { return FixedVF || ScalableVF; }
435
436 /// \return true if either fixed- or scalable VF is a valid vector VF.
437 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
438};
439
440/// Planner drives the vectorization process after having passed
441/// Legality checks.
443 /// The loop that we evaluate.
444 Loop *OrigLoop;
445
446 /// Loop Info analysis.
447 LoopInfo *LI;
448
449 /// The dominator tree.
450 DominatorTree *DT;
451
452 /// Target Library Info.
453 const TargetLibraryInfo *TLI;
454
455 /// Target Transform Info.
456 const TargetTransformInfo &TTI;
457
458 /// The legality analysis.
460
461 /// The profitability analysis.
463
464 /// The interleaved access analysis.
466
468
469 const LoopVectorizeHints &Hints;
470
472
474
475 /// Profitable vector factors.
477
478 /// A builder used to construct the current plan.
479 VPBuilder Builder;
480
481 /// Computes the cost of \p Plan for vectorization factor \p VF.
482 ///
483 /// The current implementation requires access to the
484 /// LoopVectorizationLegality to handle inductions and reductions, which is
485 /// why it is kept separate from the VPlan-only cost infrastructure.
486 ///
487 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
488 /// been retired.
489 InstructionCost cost(VPlan &Plan, ElementCount VF) const;
490
491 /// Precompute costs for certain instructions using the legacy cost model. The
492 /// function is used to bring up the VPlan-based cost model to initially avoid
493 /// taking different decisions due to inaccuracies in the legacy cost model.
494 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
495 VPCostContext &CostCtx) const;
496
497public:
499 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
504 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
505 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
506
507 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
508 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
509 /// interleaving should be avoided up-front, no plans are generated.
510 void plan(ElementCount UserVF, unsigned UserIC);
511
512 /// Use the VPlan-native path to plan how to best vectorize, return the best
513 /// VF and its cost.
515
516 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
517 /// for each VF.
518 VPlan &getPlanFor(ElementCount VF) const;
519
520 /// Compute and return the most profitable vectorization factor. Also collect
521 /// all profitable VFs in ProfitableVFs.
523
524 /// \return The desired interleave count.
525 /// If interleave count has been specified by metadata it will be returned.
526 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
527 /// are the selected vectorization factor and the cost of the selected VF.
528 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
529 InstructionCost LoopCost);
530
531 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
532 /// according to the best selected \p VF and \p UF.
533 ///
534 /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
535 /// epilogue vector loop. It should be removed once the re-use issue has been
536 /// fixed.
537 ///
538 /// Returns a mapping of SCEVs to their expanded IR values.
539 /// Note that this is a temporary workaround needed due to the current
540 /// epilogue handling.
542 VPlan &BestPlan,
544 DominatorTree *DT,
545 bool VectorizingEpilogue);
546
547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
548 void printPlans(raw_ostream &O);
549#endif
550
551 /// Look through the existing plans and return true if we have one with
552 /// vectorization factor \p VF.
554 return any_of(VPlans,
555 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
556 }
557
558 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
559 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
560 /// returned value holds for the entire \p Range.
561 static bool
562 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
563 VFRange &Range);
564
565 /// \return The most profitable vectorization factor and the cost of that VF
566 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
567 /// epilogue vectorization is not supported for the loop.
569 selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
570
571 /// Emit remarks for recipes with invalid costs in the available VPlans.
573
574 /// Create a check to \p Plan to see if the vector loop should be executed
575 /// based on its trip count.
576 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
577 ElementCount MinProfitableTripCount) const;
578
579 /// Update loop metadata and profile info for both the scalar remainder loop
580 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
581 /// loop on the vector loop and replaces vectorizer-specific metadata. The
582 /// loop ID of the original loop \p OrigLoopID must be passed, together with
583 /// the average trip count and invocation weight of the original loop (\p
584 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
585 /// cannot be retrieved after the plan has been executed, as the original loop
586 /// may have been removed.
588 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
589 bool VectorizingEpilogue, MDNode *OrigLoopID,
590 std::optional<unsigned> OrigAverageTripCount,
591 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
592 bool DisableRuntimeUnroll);
593
594protected:
595 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
596 /// according to the information gathered by Legal when it checked if it is
597 /// legal to vectorize the loop.
598 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
599
600private:
601 /// Build a VPlan according to the information gathered by Legal. \return a
602 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
603 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
604 /// the input range, set the largest included VF to the maximum VF for which
605 /// no plan could be built.
606 VPlanPtr tryToBuildVPlan(VFRange &Range);
607
608 /// Build a VPlan using VPRecipes according to the information gather by
609 /// Legal. This method is only used for the legacy inner loop vectorizer.
610 /// \p Range's largest included VF is restricted to the maximum VF the
611 /// returned VPlan is valid for. If no VPlan can be built for the input range,
612 /// set the largest included VF to the maximum VF for which no plan could be
613 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
614 /// is a plain CFG VPlan wrapping the original scalar loop.
615 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
616 LoopVersioning *LVer);
617
618 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
619 /// according to the information gathered by Legal when it checked if it is
620 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
621 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
622
623 // Adjust the recipes for reductions. For in-loop reductions the chain of
624 // instructions leading from the loop exit instr to the phi need to be
625 // converted to reductions, with one operand being vector and the other being
626 // the scalar reduction chain. For other reductions, a select is introduced
627 // between the phi and users outside the vector region when folding the tail.
628 void adjustRecipesForReductions(VPlanPtr &Plan,
629 VPRecipeBuilder &RecipeBuilder,
630 ElementCount MinVF);
631
632 /// Attach the runtime checks of \p RTChecks to \p Plan.
633 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
634 bool HasBranchWeights) const;
635
636#ifndef NDEBUG
637 /// \return The most profitable vectorization factor for the available VPlans
638 /// and the cost of that VF.
639 /// This is now only used to verify the decisions by the new VPlan-based
640 /// cost-model and will be retired once the VPlan-based cost-model is
641 /// stabilized.
642 VectorizationFactor selectVectorizationFactor();
643#endif
644
645 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
646 /// that of B.
647 bool isMoreProfitable(const VectorizationFactor &A,
648 const VectorizationFactor &B, bool HasTail,
649 bool IsEpilogue = false) const;
650
651 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
652 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
653 bool isMoreProfitable(const VectorizationFactor &A,
654 const VectorizationFactor &B,
655 const unsigned MaxTripCount, bool HasTail,
656 bool IsEpilogue = false) const;
657
658 /// Determines if we have the infrastructure to vectorize the loop and its
659 /// epilogue, assuming the main loop is vectorized by \p VF.
660 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
661};
662
663} // namespace llvm
664
665#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC)
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1576
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1627
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1560
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1541
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1705
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3983
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4010
iterator end()
Definition VPlan.h:4020
VPlan * getPlan()
Definition VPlan.cpp:161
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={})
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", std::optional< FastMathFlags > FMFs=std::nullopt)
VPBasicBlock::iterator getInsertPoint() const
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder(VPRecipeBase *InsertPt)
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL, const Twine &Name="")
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3733
Recipe to expand a SCEV expression.
Definition VPlan.h:3526
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1241
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1031
@ VScale
Returns the value for vscale.
Definition VPlan.h:1120
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
VPBasicBlock * getParent()
Definition VPlan.h:407
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3803
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1545
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4301
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4569
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
cl::opt< unsigned > ForceTargetInstructionCost
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)