LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
30
31namespace {
32class GeneratedRTChecks;
33}
34
35namespace llvm {
36
37class LoopInfo;
38class DominatorTree;
43class LoopVersioning;
47class VPRecipeBuilder;
48struct VFRange;
49
52
53/// VPlan-based builder utility analogous to IRBuilder.
54class VPBuilder {
55 VPBasicBlock *BB = nullptr;
57
58 /// Insert \p VPI in BB at InsertPt if BB is set.
59 template <typename T> T *tryInsertInstruction(T *R) {
60 if (BB)
61 BB->insert(R, InsertPt);
62 return R;
63 }
64
65 VPInstruction *createInstruction(unsigned Opcode,
66 ArrayRef<VPValue *> Operands,
67 const VPIRMetadata &MD, DebugLoc DL,
68 const Twine &Name = "") {
69 return tryInsertInstruction(
70 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
71 }
72
73public:
74 VPBuilder() = default;
75 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
76 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
80
81 /// Clear the insertion point: created instructions will not be inserted into
82 /// a block.
84 BB = nullptr;
85 InsertPt = VPBasicBlock::iterator();
86 }
87
88 VPBasicBlock *getInsertBlock() const { return BB; }
89 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
90
91 /// Create a VPBuilder to insert after \p R.
94 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
95 return B;
96 }
97
98 /// InsertPoint - A saved insertion point.
100 VPBasicBlock *Block = nullptr;
102
103 public:
104 /// Creates a new insertion point which doesn't point to anything.
105 VPInsertPoint() = default;
106
107 /// Creates a new insertion point at the given location.
109 : Block(InsertBlock), Point(InsertPoint) {}
110
111 /// Returns true if this insert point is set.
112 bool isSet() const { return Block != nullptr; }
113
114 VPBasicBlock *getBlock() const { return Block; }
115 VPBasicBlock::iterator getPoint() const { return Point; }
116 };
117
118 /// Sets the current insert point to a previously-saved location.
120 if (IP.isSet())
121 setInsertPoint(IP.getBlock(), IP.getPoint());
122 else
124 }
125
126 /// This specifies that created VPInstructions should be appended to the end
127 /// of the specified block.
129 assert(TheBB && "Attempting to set a null insert point");
130 BB = TheBB;
131 InsertPt = BB->end();
132 }
133
134 /// This specifies that created instructions should be inserted at the
135 /// specified point.
137 BB = TheBB;
138 InsertPt = IP;
139 }
140
141 /// This specifies that created instructions should be inserted at the
142 /// specified point.
144 BB = IP->getParent();
145 InsertPt = IP->getIterator();
146 }
147
148 /// Insert \p R at the current insertion point.
149 void insert(VPRecipeBase *R) { BB->insert(R, InsertPt); }
150
151 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
152 /// its underlying Instruction.
154 Instruction *Inst = nullptr,
155 const VPIRFlags &Flags = {},
156 const VPIRMetadata &MD = {},
158 const Twine &Name = "") {
159 VPInstruction *NewVPInst = tryInsertInstruction(
160 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
161 NewVPInst->setUnderlyingValue(Inst);
162 return NewVPInst;
163 }
165 DebugLoc DL, const Twine &Name = "") {
166 return createInstruction(Opcode, Operands, {}, DL, Name);
167 }
169 const VPIRFlags &Flags,
171 const Twine &Name = "") {
172 return tryInsertInstruction(
173 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
174 }
175
177 Type *ResultTy, const VPIRFlags &Flags = {},
179 const Twine &Name = "") {
180 return tryInsertInstruction(new VPInstructionWithType(
181 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
182 }
183
185 unsigned Opcode, ArrayRef<VPValue *> Operands,
186 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
187 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
188 return tryInsertInstruction(
189 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
190 }
191
194 const Twine &Name = "") {
195 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
196 }
197
200 const Twine &Name = "") {
201 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
202 Name);
203 }
204
207 const Twine &Name = "") {
208
209 return tryInsertInstruction(new VPInstruction(
210 Instruction::BinaryOps::Or, {LHS, RHS},
211 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
212 }
213
216 const Twine &Name = "",
217 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
218 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
219 Name);
220 }
221
222 VPInstruction *
224 const Twine &Name = "",
225 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
226 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
227 Name);
228 }
229
235
241
243 VPValue *FalseVal,
245 const Twine &Name = "",
246 const VPIRFlags &Flags = {}) {
247 return tryInsertInstruction(new VPInstruction(
248 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
249 }
250
251 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
252 /// and \p B.
255 const Twine &Name = "") {
257 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
258 return tryInsertInstruction(
259 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
260 }
261
262 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
263 /// and \p B.
266 const Twine &Name = "") {
268 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
269 return tryInsertInstruction(
270 new VPInstruction(Instruction::FCmp, {A, B},
271 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
272 }
273
276 const Twine &Name = "") {
277 return tryInsertInstruction(
279 GEPNoWrapFlags::none(), {}, DL, Name));
280 }
281
283 GEPNoWrapFlags GEPFlags,
285 const Twine &Name = "") {
286 return tryInsertInstruction(new VPInstruction(
287 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
288 }
289
292 const Twine &Name = "") {
293 return tryInsertInstruction(
295 GEPNoWrapFlags::none(), {}, DL, Name));
296 }
297
300 const Twine &Name = "", const VPIRFlags &Flags = {}) {
301 return tryInsertInstruction(new VPPhi(IncomingValues, Flags, DL, Name));
302 }
303
305 VPlan &Plan = *getInsertBlock()->getPlan();
306 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
307 if (EC.isScalable()) {
308 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
309 RuntimeEC = EC.getKnownMinValue() == 1
310 ? VScale
311 : createOverflowingOp(Instruction::Mul,
312 {VScale, RuntimeEC}, {true, false});
313 }
314 return RuntimeEC;
315 }
316
317 /// Convert the input value \p Current to the corresponding value of an
318 /// induction with \p Start and \p Step values, using \p Start + \p Current *
319 /// \p Step.
321 FPMathOperator *FPBinOp, VPIRValue *Start,
322 VPValue *Current, VPValue *Step,
323 const Twine &Name = "") {
324 return tryInsertInstruction(
325 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step, Name));
326 }
327
329 DebugLoc DL,
330 const VPIRMetadata &Metadata = {}) {
331 return tryInsertInstruction(new VPInstructionWithType(
332 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
333 }
334
336 Type *ResultTy, DebugLoc DL,
337 const VPIRMetadata &Metadata = {}) {
338 return tryInsertInstruction(new VPInstructionWithType(
339 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
340 DL));
341 }
342
344 Type *ResultTy, DebugLoc DL,
345 const VPIRFlags &Flags,
346 const VPIRMetadata &Metadata = {}) {
347 return tryInsertInstruction(
348 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
349 }
350
352 DebugLoc DL) {
353 if (ResultTy == SrcTy)
354 return Op;
355 Instruction::CastOps CastOp =
356 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
357 ? Instruction::Trunc
358 : Instruction::ZExt;
359 return createScalarCast(CastOp, Op, ResultTy, DL);
360 }
361
363 DebugLoc DL) {
364 if (ResultTy == SrcTy)
365 return Op;
366 Instruction::CastOps CastOp =
367 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
368 ? Instruction::Trunc
369 : Instruction::SExt;
370 return createScalarCast(CastOp, Op, ResultTy, DL);
371 }
372
374 Type *ResultTy) {
375 return tryInsertInstruction(new VPWidenCastRecipe(
376 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
377 }
378
381 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
382 VPValue *VF, DebugLoc DL) {
383 return tryInsertInstruction(new VPScalarIVStepsRecipe(
384 IV, Step, VF, InductionOpcode,
385 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
386 }
387
389 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
390 }
391
392 //===--------------------------------------------------------------------===//
393 // RAII helpers.
394 //===--------------------------------------------------------------------===//
395
396 /// RAII object that stores the current insertion point and restores it when
397 /// the object is destroyed.
399 VPBuilder &Builder;
400 VPBasicBlock *Block;
402
403 public:
405 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
406
409
410 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
411 };
412};
413
414/// TODO: The following VectorizationFactor was pulled out of
415/// LoopVectorizationCostModel class. LV also deals with
416/// VectorizerParams::VectorizationFactor.
417/// We need to streamline them.
418
419/// Information about vectorization costs.
421 /// Vector width with best cost.
423
424 /// Cost of the loop with that width.
426
427 /// Cost of the scalar loop.
429
430 /// The minimum trip count required to make vectorization profitable, e.g. due
431 /// to runtime checks.
433
437
438 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
440 return {ElementCount::getFixed(1), 0, 0};
441 }
442
443 bool operator==(const VectorizationFactor &rhs) const {
444 return Width == rhs.Width && Cost == rhs.Cost;
445 }
446
447 bool operator!=(const VectorizationFactor &rhs) const {
448 return !(*this == rhs);
449 }
450};
451
452/// A class that represents two vectorization factors (initialized with 0 by
453/// default). One for fixed-width vectorization and one for scalable
454/// vectorization. This can be used by the vectorizer to choose from a range of
455/// fixed and/or scalable VFs in order to find the most cost-effective VF to
456/// vectorize with.
460
462 : FixedVF(ElementCount::getFixed(0)),
463 ScalableVF(ElementCount::getScalable(0)) {}
465 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
466 }
470 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
471 "Invalid scalable properties");
472 }
473
475
476 /// \return true if either fixed- or scalable VF is non-zero.
477 explicit operator bool() const { return FixedVF || ScalableVF; }
478
479 /// \return true if either fixed- or scalable VF is a valid vector VF.
480 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
481};
482
483/// Planner drives the vectorization process after having passed
484/// Legality checks.
486 /// The loop that we evaluate.
487 Loop *OrigLoop;
488
489 /// Loop Info analysis.
490 LoopInfo *LI;
491
492 /// The dominator tree.
493 DominatorTree *DT;
494
495 /// Target Library Info.
496 const TargetLibraryInfo *TLI;
497
498 /// Target Transform Info.
499 const TargetTransformInfo &TTI;
500
501 /// The legality analysis.
503
504 /// The profitability analysis.
506
507 /// The interleaved access analysis.
509
511
512 const LoopVectorizeHints &Hints;
513
515
517
518 /// Profitable vector factors.
520
521 /// A builder used to construct the current plan.
522 VPBuilder Builder;
523
524 /// Computes the cost of \p Plan for vectorization factor \p VF.
525 ///
526 /// The current implementation requires access to the
527 /// LoopVectorizationLegality to handle inductions and reductions, which is
528 /// why it is kept separate from the VPlan-only cost infrastructure.
529 ///
530 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
531 /// been retired.
532 InstructionCost cost(VPlan &Plan, ElementCount VF) const;
533
534 /// Precompute costs for certain instructions using the legacy cost model. The
535 /// function is used to bring up the VPlan-based cost model to initially avoid
536 /// taking different decisions due to inaccuracies in the legacy cost model.
537 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
538 VPCostContext &CostCtx) const;
539
540public:
542 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
547 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
548 IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
549
550 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
551 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
552 /// interleaving should be avoided up-front, no plans are generated.
553 void plan(ElementCount UserVF, unsigned UserIC);
554
555 /// Use the VPlan-native path to plan how to best vectorize, return the best
556 /// VF and its cost.
558
559 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
560 /// for each VF.
561 VPlan &getPlanFor(ElementCount VF) const;
562
563 /// Compute and return the most profitable vectorization factor. Also collect
564 /// all profitable VFs in ProfitableVFs.
566
567 /// \return The desired interleave count.
568 /// If interleave count has been specified by metadata it will be returned.
569 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
570 /// are the selected vectorization factor and the cost of the selected VF.
571 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
572 InstructionCost LoopCost);
573
574 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
575 /// according to the best selected \p VF and \p UF.
576 ///
577 /// TODO: \p VectorizingEpilogue indicates if the executed VPlan is for the
578 /// epilogue vector loop. It should be removed once the re-use issue has been
579 /// fixed.
580 ///
581 /// Returns a mapping of SCEVs to their expanded IR values.
582 /// Note that this is a temporary workaround needed due to the current
583 /// epilogue handling.
585 VPlan &BestPlan,
587 DominatorTree *DT,
588 bool VectorizingEpilogue);
589
590#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
591 void printPlans(raw_ostream &O);
592#endif
593
594 /// Look through the existing plans and return true if we have one with
595 /// vectorization factor \p VF.
597 return any_of(VPlans,
598 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
599 }
600
601 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
602 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
603 /// returned value holds for the entire \p Range.
604 static bool
605 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
606 VFRange &Range);
607
608 /// \return The most profitable vectorization factor and the cost of that VF
609 /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
610 /// epilogue vectorization is not supported for the loop.
612 selectEpilogueVectorizationFactor(const ElementCount MainLoopVF, unsigned IC);
613
614 /// Emit remarks for recipes with invalid costs in the available VPlans.
616
617 /// Create a check to \p Plan to see if the vector loop should be executed
618 /// based on its trip count.
619 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
620 ElementCount MinProfitableTripCount) const;
621
622 /// Update loop metadata and profile info for both the scalar remainder loop
623 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
624 /// loop on the vector loop and replaces vectorizer-specific metadata. The
625 /// loop ID of the original loop \p OrigLoopID must be passed, together with
626 /// the average trip count and invocation weight of the original loop (\p
627 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
628 /// cannot be retrieved after the plan has been executed, as the original loop
629 /// may have been removed.
631 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
632 bool VectorizingEpilogue, MDNode *OrigLoopID,
633 std::optional<unsigned> OrigAverageTripCount,
634 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
635 bool DisableRuntimeUnroll);
636
637protected:
638 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
639 /// according to the information gathered by Legal when it checked if it is
640 /// legal to vectorize the loop.
641 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
642
643private:
644 /// Build a VPlan according to the information gathered by Legal. \return a
645 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
646 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
647 /// the input range, set the largest included VF to the maximum VF for which
648 /// no plan could be built.
649 VPlanPtr tryToBuildVPlan(VFRange &Range);
650
651 /// Build a VPlan using VPRecipes according to the information gather by
652 /// Legal. This method is only used for the legacy inner loop vectorizer.
653 /// \p Range's largest included VF is restricted to the maximum VF the
654 /// returned VPlan is valid for. If no VPlan can be built for the input range,
655 /// set the largest included VF to the maximum VF for which no plan could be
656 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
657 /// is a plain CFG VPlan wrapping the original scalar loop.
658 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range,
659 LoopVersioning *LVer);
660
661 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
662 /// according to the information gathered by Legal when it checked if it is
663 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
664 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
665
666 /// Add recipes to compute the final reduction result (ComputeAnyOfResult,
667 /// ComputeReductionResult depending on the reduction) in
668 /// the middle block. Selects are introduced for reductions between the phi
669 /// and users outside the vector region when folding the tail.
670 void addReductionResultComputation(VPlanPtr &Plan,
671 VPRecipeBuilder &RecipeBuilder,
672 ElementCount MinVF);
673
674 /// Attach the runtime checks of \p RTChecks to \p Plan.
675 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
676 bool HasBranchWeights) const;
677
678#ifndef NDEBUG
679 /// \return The most profitable vectorization factor for the available VPlans
680 /// and the cost of that VF.
681 /// This is now only used to verify the decisions by the new VPlan-based
682 /// cost-model and will be retired once the VPlan-based cost-model is
683 /// stabilized.
684 VectorizationFactor selectVectorizationFactor();
685#endif
686
687 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
688 /// that of B.
689 bool isMoreProfitable(const VectorizationFactor &A,
690 const VectorizationFactor &B, bool HasTail,
691 bool IsEpilogue = false) const;
692
693 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
694 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
695 bool isMoreProfitable(const VectorizationFactor &A,
696 const VectorizationFactor &B,
697 const unsigned MaxTripCount, bool HasTail,
698 bool IsEpilogue = false) const;
699
700 /// Determines if we have the infrastructure to vectorize the loop and its
701 /// epilogue, assuming the main loop is vectorized by \p VF.
702 bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
703};
704
705} // namespace llvm
706
707#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
VectorizationFactor selectEpilogueVectorizationFactor(const ElementCount MainLoopVF, unsigned IC)
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1604
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1655
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1588
VectorizationFactor computeBestVF()
Compute and return the most profitable vectorization factor.
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, bool VectorizingEpilogue)
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1569
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1749
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
This class represents an analyzed expression in the program.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4182
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4209
iterator end()
Definition VPlan.h:4219
VPlan * getPlan()
Definition VPlan.cpp:177
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
void insert(VPRecipeBase *R)
Insert R at the current insertion point.
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBuilder(VPRecipeBase *InsertPt)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3927
Recipe to expand a SCEV expression.
Definition VPlan.h:3719
Class to record and manage LLVM IR flags.
Definition VPlan.h:670
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1105
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1451
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1160
@ VScale
Returns the value for vscale.
Definition VPlan.h:1272
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
VPBasicBlock * getParent()
Definition VPlan.h:462
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3999
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1767
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4500
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4778
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
cl::opt< unsigned > ForceTargetInstructionCost
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)