LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
43class LoopVersioning;
47class VPRecipeBuilder;
48struct VPRegisterUsage;
49struct VFRange;
50
53
54/// VPlan-based builder utility analogous to IRBuilder.
55class VPBuilder {
56 VPBasicBlock *BB = nullptr;
58
59 /// Insert \p VPI in BB at InsertPt if BB is set.
60 template <typename T> T *tryInsertInstruction(T *R) {
61 if (BB)
62 BB->insert(R, InsertPt);
63 return R;
64 }
65
66 VPInstruction *createInstruction(unsigned Opcode,
67 ArrayRef<VPValue *> Operands,
68 const VPIRMetadata &MD, DebugLoc DL,
69 const Twine &Name = "") {
70 return tryInsertInstruction(
71 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
72 }
73
74public:
75 VPBuilder() = default;
76 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
77 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
81
82 /// Clear the insertion point: created instructions will not be inserted into
83 /// a block.
85 BB = nullptr;
86 InsertPt = VPBasicBlock::iterator();
87 }
88
89 VPBasicBlock *getInsertBlock() const { return BB; }
90 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
91
92 /// Create a VPBuilder to insert after \p R.
95 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
96 return B;
97 }
98
99 /// InsertPoint - A saved insertion point.
101 VPBasicBlock *Block = nullptr;
103
104 public:
105 /// Creates a new insertion point which doesn't point to anything.
106 VPInsertPoint() = default;
107
108 /// Creates a new insertion point at the given location.
110 : Block(InsertBlock), Point(InsertPoint) {}
111
112 /// Returns true if this insert point is set.
113 bool isSet() const { return Block != nullptr; }
114
115 VPBasicBlock *getBlock() const { return Block; }
116 VPBasicBlock::iterator getPoint() const { return Point; }
117 };
118
119 /// Sets the current insert point to a previously-saved location.
121 if (IP.isSet())
122 setInsertPoint(IP.getBlock(), IP.getPoint());
123 else
125 }
126
127 /// This specifies that created VPInstructions should be appended to the end
128 /// of the specified block.
130 assert(TheBB && "Attempting to set a null insert point");
131 BB = TheBB;
132 InsertPt = BB->end();
133 }
134
135 /// This specifies that created instructions should be inserted at the
136 /// specified point.
138 BB = TheBB;
139 InsertPt = IP;
140 }
141
142 /// This specifies that created instructions should be inserted at the
143 /// specified point.
145 BB = IP->getParent();
146 InsertPt = IP->getIterator();
147 }
148
149 /// Insert \p R at the current insertion point.
150 void insert(VPRecipeBase *R) { BB->insert(R, InsertPt); }
151
152 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
153 /// its underlying Instruction.
155 Instruction *Inst = nullptr,
156 const VPIRFlags &Flags = {},
157 const VPIRMetadata &MD = {},
159 const Twine &Name = "") {
160 VPInstruction *NewVPInst = tryInsertInstruction(
161 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
162 NewVPInst->setUnderlyingValue(Inst);
163 return NewVPInst;
164 }
166 DebugLoc DL, const Twine &Name = "") {
167 return createInstruction(Opcode, Operands, {}, DL, Name);
168 }
170 const VPIRFlags &Flags,
172 const Twine &Name = "") {
173 return tryInsertInstruction(
174 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
175 }
176
178 Type *ResultTy, const VPIRFlags &Flags = {},
180 const Twine &Name = "") {
181 return tryInsertInstruction(new VPInstructionWithType(
182 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
183 }
184
186 unsigned Opcode, ArrayRef<VPValue *> Operands,
187 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
188 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
189 return tryInsertInstruction(
190 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
191 }
192
195 const Twine &Name = "") {
196 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
197 }
198
201 const Twine &Name = "") {
202 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
203 Name);
204 }
205
208 const Twine &Name = "") {
209
210 return tryInsertInstruction(new VPInstruction(
211 Instruction::BinaryOps::Or, {LHS, RHS},
212 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
213 }
214
217 const Twine &Name = "",
218 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
219 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
220 Name);
221 }
222
223 VPInstruction *
225 const Twine &Name = "",
226 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
227 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
228 Name);
229 }
230
236
242
244 VPValue *FalseVal,
246 const Twine &Name = "",
247 const VPIRFlags &Flags = {}) {
248 return tryInsertInstruction(new VPInstruction(
249 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
250 }
251
252 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
253 /// and \p B.
256 const Twine &Name = "") {
258 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
259 return tryInsertInstruction(
260 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
261 }
262
263 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
264 /// and \p B.
267 const Twine &Name = "") {
269 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
270 return tryInsertInstruction(
271 new VPInstruction(Instruction::FCmp, {A, B},
272 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
273 }
274
277 const Twine &Name = "") {
278 return tryInsertInstruction(
280 GEPNoWrapFlags::none(), {}, DL, Name));
281 }
282
284 GEPNoWrapFlags GEPFlags,
286 const Twine &Name = "") {
287 return tryInsertInstruction(new VPInstruction(
288 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
289 }
290
293 const Twine &Name = "") {
294 return tryInsertInstruction(
296 GEPNoWrapFlags::none(), {}, DL, Name));
297 }
298
301 const Twine &Name = "", const VPIRFlags &Flags = {}) {
302 return tryInsertInstruction(new VPPhi(IncomingValues, Flags, DL, Name));
303 }
304
306 VPlan &Plan = *getInsertBlock()->getPlan();
307 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
308 if (EC.isScalable()) {
309 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
310 RuntimeEC = EC.getKnownMinValue() == 1
311 ? VScale
312 : createOverflowingOp(Instruction::Mul,
313 {VScale, RuntimeEC}, {true, false});
314 }
315 return RuntimeEC;
316 }
317
318 /// Convert the input value \p Current to the corresponding value of an
319 /// induction with \p Start and \p Step values, using \p Start + \p Current *
320 /// \p Step.
322 FPMathOperator *FPBinOp, VPIRValue *Start,
323 VPValue *Current, VPValue *Step,
324 const Twine &Name = "") {
325 return tryInsertInstruction(
326 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
327 }
328
330 DebugLoc DL,
331 const VPIRMetadata &Metadata = {}) {
332 return tryInsertInstruction(new VPInstructionWithType(
333 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
334 }
335
337 Type *ResultTy, DebugLoc DL,
338 const VPIRMetadata &Metadata = {}) {
339 return tryInsertInstruction(new VPInstructionWithType(
340 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
341 DL));
342 }
343
345 Type *ResultTy, DebugLoc DL,
346 const VPIRFlags &Flags,
347 const VPIRMetadata &Metadata = {}) {
348 return tryInsertInstruction(
349 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
350 }
351
353 DebugLoc DL) {
354 if (ResultTy == SrcTy)
355 return Op;
356 Instruction::CastOps CastOp =
357 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
358 ? Instruction::Trunc
359 : Instruction::ZExt;
360 return createScalarCast(CastOp, Op, ResultTy, DL);
361 }
362
364 DebugLoc DL) {
365 if (ResultTy == SrcTy)
366 return Op;
367 Instruction::CastOps CastOp =
368 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
369 ? Instruction::Trunc
370 : Instruction::SExt;
371 return createScalarCast(CastOp, Op, ResultTy, DL);
372 }
373
375 Type *ResultTy) {
376 return tryInsertInstruction(new VPWidenCastRecipe(
377 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
378 }
379
382 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
383 VPValue *VF, DebugLoc DL) {
384 return tryInsertInstruction(new VPScalarIVStepsRecipe(
385 IV, Step, VF, InductionOpcode,
386 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
387 }
388
390 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
391 }
392
393 //===--------------------------------------------------------------------===//
394 // RAII helpers.
395 //===--------------------------------------------------------------------===//
396
397 /// RAII object that stores the current insertion point and restores it when
398 /// the object is destroyed.
400 VPBuilder &Builder;
401 VPBasicBlock *Block;
403
404 public:
406 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
407
410
411 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
412 };
413};
414
415/// TODO: The following VectorizationFactor was pulled out of
416/// LoopVectorizationCostModel class. LV also deals with
417/// VectorizerParams::VectorizationFactor.
418/// We need to streamline them.
419
420/// Information about vectorization costs.
422 /// Vector width with best cost.
424
425 /// Cost of the loop with that width.
427
428 /// Cost of the scalar loop.
430
431 /// The minimum trip count required to make vectorization profitable, e.g. due
432 /// to runtime checks.
434
438
439 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
441 return {ElementCount::getFixed(1), 0, 0};
442 }
443
444 bool operator==(const VectorizationFactor &rhs) const {
445 return Width == rhs.Width && Cost == rhs.Cost;
446 }
447
448 bool operator!=(const VectorizationFactor &rhs) const {
449 return !(*this == rhs);
450 }
451};
452
453/// A class that represents two vectorization factors (initialized with 0 by
454/// default). One for fixed-width vectorization and one for scalable
455/// vectorization. This can be used by the vectorizer to choose from a range of
456/// fixed and/or scalable VFs in order to find the most cost-effective VF to
457/// vectorize with.
461
463 : FixedVF(ElementCount::getFixed(0)),
464 ScalableVF(ElementCount::getScalable(0)) {}
466 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
467 }
471 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
472 "Invalid scalable properties");
473 }
474
476
477 /// \return true if either fixed- or scalable VF is non-zero.
478 explicit operator bool() const { return FixedVF || ScalableVF; }
479
480 /// \return true if either fixed- or scalable VF is a valid vector VF.
481 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
482};
483
484/// Planner drives the vectorization process after having passed
485/// Legality checks.
487 /// The loop that we evaluate.
488 Loop *OrigLoop;
489
490 /// Loop Info analysis.
491 LoopInfo *LI;
492
493 /// The dominator tree.
494 DominatorTree *DT;
495
496 /// Target Library Info.
497 const TargetLibraryInfo *TLI;
498
499 /// Target Transform Info.
500 const TargetTransformInfo &TTI;
501
502 /// The legality analysis.
504
505 /// The profitability analysis.
507
508 /// The interleaved access analysis.
510
512
513 const LoopVectorizeHints &Hints;
514
516
518
519 /// Profitable vector factors.
521
522 /// A builder used to construct the current plan.
523 VPBuilder Builder;
524
525 /// Computes the cost of \p Plan for vectorization factor \p VF.
526 ///
527 /// The current implementation requires access to the
528 /// LoopVectorizationLegality to handle inductions and reductions, which is
529 /// why it is kept separate from the VPlan-only cost infrastructure.
530 ///
531 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
532 /// been retired.
533 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
534
535 /// Precompute costs for certain instructions using the legacy cost model. The
536 /// function is used to bring up the VPlan-based cost model to initially avoid
537 /// taking different decisions due to inaccuracies in the legacy cost model.
538 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
539 VPCostContext &CostCtx) const;
540
541public:
543 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
548 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
549 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
550
551 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
552 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
553 /// interleaving should be avoided up-front, no plans are generated.
554 void plan(ElementCount UserVF, unsigned UserIC);
555
556 /// Use the VPlan-native path to plan how to best vectorize, return the best
557 /// VF and its cost.
559
560 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
561 /// for each VF.
562 VPlan &getPlanFor(ElementCount VF) const;
563
564 /// Compute and return the most profitable vectorization factor. Also collect
565 /// all profitable VFs in ProfitableVFs.
567
568 /// \return The desired interleave count.
569 /// If interleave count has been specified by metadata it will be returned.
570 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
571 /// are the selected vectorization factor and the cost of the selected VF.
572 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
573 InstructionCost LoopCost);
574
575 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
576 /// according to the best selected \p VF and \p UF.
577 ///
578 /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
579 /// epilogue vector loop. It should be removed once the re-use issue has been
580 /// fixed.
581 ///
582 /// Returns a mapping of SCEVs to their expanded IR values.
583 /// Note that this is a temporary workaround needed due to the current
584 /// epilogue handling.
586 VPlan &BestPlan,
588 DominatorTree *DT,
589 bool VectorizingEpilogue);
590
591#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
592 void printPlans(raw_ostream &O);
593#endif
594
595 /// Look through the existing plans and return true if we have one with
596 /// vectorization factor \p VF.
598 return any_of(VPlans,
599 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
600 }
601
602 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
603 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
604 /// returned value holds for the entire \p Range.
605 static bool
606 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
607 VFRange &Range);
608
609 /// \return The most profitable vectorization factor and the cost of that VF
610 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
611 /// epilogue vectorization is not supported for the loop.
613 unsigned IC);
614
615 /// Emit remarks for recipes with invalid costs in the available VPlans.
617
618 /// Create a check to \p Plan to see if the vector loop should be executed
619 /// based on its trip count.
620 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
621 ElementCount MinProfitableTripCount) const;
622
623 /// Update loop metadata and profile info for both the scalar remainder loop
624 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
625 /// loop on the vector loop and replaces vectorizer-specific metadata. The
626 /// loop ID of the original loop \p OrigLoopID must be passed, together with
627 /// the average trip count and invocation weight of the original loop (\p
628 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
629 /// cannot be retrieved after the plan has been executed, as the original loop
630 /// may have been removed.
632 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
633 bool VectorizingEpilogue, MDNode *OrigLoopID,
634 std::optional<unsigned> OrigAverageTripCount,
635 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
636 bool DisableRuntimeUnroll);
637
638protected:
639 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
640 /// according to the information gathered by Legal when it checked if it is
641 /// legal to vectorize the loop.
642 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
643
644private:
645 /// Build a VPlan according to the information gathered by Legal. \return a
646 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
647 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
648 /// the input range, set the largest included VF to the maximum VF for which
649 /// no plan could be built.
650 VPlanPtr tryToBuildVPlan(VFRange &Range);
651
652 /// Build a VPlan using VPRecipes according to the information gather by
653 /// Legal. This method is only used for the legacy inner loop vectorizer.
654 /// \p Range's largest included VF is restricted to the maximum VF the
655 /// returned VPlan is valid for. If no VPlan can be built for the input range,
656 /// set the largest included VF to the maximum VF for which no plan could be
657 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
658 /// is a plain CFG VPlan wrapping the original scalar loop.
659 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
660 LoopVersioning *LVer);
661
662 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
663 /// according to the information gathered by Legal when it checked if it is
664 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
665 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
666
667 /// Add recipes to compute the final reduction result (ComputeAnyOfResult,
668 /// ComputeReductionResult depending on the reduction) in
669 /// the middle block. Selects are introduced for reductions between the phi
670 /// and users outside the vector region when folding the tail.
671 void addReductionResultComputation(VPlanPtr &Plan,
672 VPRecipeBuilder &RecipeBuilder,
673 ElementCount MinVF);
674
675 /// Attach the runtime checks of \p RTChecks to \p Plan.
676 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
677 bool HasBranchWeights) const;
678
679#ifndef NDEBUG
680 /// \return The most profitable vectorization factor for the available VPlans
681 /// and the cost of that VF.
682 /// This is now only used to verify the decisions by the new VPlan-based
683 /// cost-model and will be retired once the VPlan-based cost-model is
684 /// stabilized.
685 VectorizationFactor selectVectorizationFactor();
686#endif
687
688 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
689 /// that of B.
690 bool isMoreProfitable(const VectorizationFactor &A,
691 const VectorizationFactor &B, bool HasTail,
692 bool IsEpilogue = false) const;
693
694 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
695 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
696 bool isMoreProfitable(const VectorizationFactor &A,
697 const VectorizationFactor &B,
698 const unsigned MaxTripCount, bool HasTail,
699 bool IsEpilogue = false) const;
700
701 /// Determines if we have the infrastructure to vectorize the loop and its
702 /// epilogue, assuming the main loop is vectorized by \p VF.
703 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
704};
705
706} // namespace llvm
707
708#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1638
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1689
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1622
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1603
VectorizationFactor selectEpilogueVectorizationFactor(ElementCount MainLoopVF, unsigned IC)
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1783
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4255
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4282
iterator end()
Definition VPlan.h:4292
VPlan * getPlan()
Definition VPlan.cpp:177
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBuilder(VPRecipeBase *InsertPt)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3987
Recipe to expand a SCEV expression.
Definition VPlan.h:3779
Class to record and manage LLVM IR flags.
Definition VPlan.h:690
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1156
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1505
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1211
@ VScale
Returns the value for vscale.
Definition VPlan.h:1325
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
VPBasicBlock * getParent()
Definition VPlan.h:481
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4059
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1826
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4573
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4863
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
cl::opt< unsigned > ForceTargetInstructionCost
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:207
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)