LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
31
32namespace {
33class GeneratedRTChecks;
34}
35
36namespace llvm {
37
38class LoopInfo;
39class DominatorTree;
45class LoopVersioning;
48class VPRecipeBuilder;
49struct VPRegisterUsage;
50struct VFRange;
51
55
56/// \return An upper bound for vscale based on TTI or the vscale_range
57/// attribute.
58std::optional<unsigned> getMaxVScale(const Function &F,
60
61/// Reports an informative message: print \p Msg for debugging purposes as well
62/// as an optimization remark. Uses either \p I as location of the remark, or
63/// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
64/// remark.
65void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
67 const Loop *TheLoop, Instruction *I = nullptr,
68 DebugLoc DL = {});
69
70/// VPlan-based builder utility analogous to IRBuilder.
71class VPBuilder {
72 VPBasicBlock *BB = nullptr;
74
75 /// Insert \p VPI in BB at InsertPt if BB is set.
76 template <typename T> T *tryInsertInstruction(T *R) {
77 if (BB)
78 BB->insert(R, InsertPt);
79 return R;
80 }
81
82 VPInstruction *createInstruction(unsigned Opcode,
83 ArrayRef<VPValue *> Operands,
84 const VPIRMetadata &MD, DebugLoc DL,
85 const Twine &Name = "") {
86 return tryInsertInstruction(
87 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
88 }
89
90public:
91 VPBuilder() = default;
92 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
93 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
97
98 /// Clear the insertion point: created instructions will not be inserted into
99 /// a block.
101 BB = nullptr;
102 InsertPt = VPBasicBlock::iterator();
103 }
104
105 VPBasicBlock *getInsertBlock() const { return BB; }
106 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
107
108 /// Create a VPBuilder to insert after \p R.
110 VPBuilder B;
111 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
112 return B;
113 }
114
115 /// InsertPoint - A saved insertion point.
117 VPBasicBlock *Block = nullptr;
119
120 public:
121 /// Creates a new insertion point which doesn't point to anything.
122 VPInsertPoint() = default;
123
124 /// Creates a new insertion point at the given location.
126 : Block(InsertBlock), Point(InsertPoint) {}
127
128 /// Returns true if this insert point is set.
129 bool isSet() const { return Block != nullptr; }
130
131 VPBasicBlock *getBlock() const { return Block; }
132 VPBasicBlock::iterator getPoint() const { return Point; }
133 };
134
135 /// Sets the current insert point to a previously-saved location.
137 if (IP.isSet())
138 setInsertPoint(IP.getBlock(), IP.getPoint());
139 else
141 }
142
143 /// This specifies that created VPInstructions should be appended to the end
144 /// of the specified block.
146 assert(TheBB && "Attempting to set a null insert point");
147 BB = TheBB;
148 InsertPt = BB->end();
149 }
150
151 /// This specifies that created instructions should be inserted at the
152 /// specified point.
154 BB = TheBB;
155 InsertPt = IP;
156 }
157
158 /// This specifies that created instructions should be inserted at the
159 /// specified point.
161 BB = IP->getParent();
162 InsertPt = IP->getIterator();
163 }
164
165 /// Insert \p R at the current insertion point. Returns \p R unchanged.
166 template <typename T> [[maybe_unused]] T *insert(T *R) {
167 BB->insert(R, InsertPt);
168 return R;
169 }
170
171 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
172 /// its underlying Instruction.
174 Instruction *Inst = nullptr,
175 const VPIRFlags &Flags = {},
176 const VPIRMetadata &MD = {},
178 const Twine &Name = "") {
179 VPInstruction *NewVPInst = tryInsertInstruction(
180 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name));
181 NewVPInst->setUnderlyingValue(Inst);
182 return NewVPInst;
183 }
185 DebugLoc DL, const Twine &Name = "") {
186 return createInstruction(Opcode, Operands, {}, DL, Name);
187 }
189 const VPIRFlags &Flags,
191 const Twine &Name = "") {
192 return tryInsertInstruction(
193 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
194 }
195
197 Type *ResultTy, const VPIRFlags &Flags = {},
199 const Twine &Name = "") {
200 return tryInsertInstruction(new VPInstructionWithType(
201 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
202 }
203
206 const Twine &Name = "") {
207 return tryInsertInstruction(new VPInstruction(
208 VPInstruction::FirstActiveLane, Masks, {}, {}, DL, Name));
209 }
210
213 const Twine &Name = "") {
214 return tryInsertInstruction(new VPInstruction(VPInstruction::LastActiveLane,
215 Masks, {}, {}, DL, Name));
216 }
217
219 unsigned Opcode, ArrayRef<VPValue *> Operands,
220 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
221 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
222 return tryInsertInstruction(
223 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
224 }
225
228 const Twine &Name = "") {
229 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
230 }
231
234 const Twine &Name = "") {
235 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
236 Name);
237 }
238
241 const Twine &Name = "") {
242
243 return tryInsertInstruction(new VPInstruction(
244 Instruction::BinaryOps::Or, {LHS, RHS},
245 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
246 }
247
250 const Twine &Name = "",
251 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
252 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
253 Name);
254 }
255
256 VPInstruction *
258 const Twine &Name = "",
259 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
260 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
261 Name);
262 }
263
269
275
277 VPValue *FalseVal,
279 const Twine &Name = "",
280 const VPIRFlags &Flags = {}) {
281 return tryInsertInstruction(new VPInstruction(
282 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
283 }
284
285 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
286 /// and \p B.
289 const Twine &Name = "") {
291 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
292 return tryInsertInstruction(
293 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
294 }
295
296 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
297 /// and \p B.
300 const Twine &Name = "") {
302 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
303 return tryInsertInstruction(
304 new VPInstruction(Instruction::FCmp, {A, B},
305 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
306 }
307
308 /// Create an AnyOf reduction pattern: or-reduce \p ChainOp, freeze the
309 /// result, then select between \p TrueVal and \p FalseVal.
311 VPValue *FalseVal,
313
316 const Twine &Name = "") {
317 return tryInsertInstruction(
319 GEPNoWrapFlags::none(), {}, DL, Name));
320 }
321
323 GEPNoWrapFlags GEPFlags,
325 const Twine &Name = "") {
326 return tryInsertInstruction(new VPInstruction(
327 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
328 }
329
332 const Twine &Name = "") {
333 return tryInsertInstruction(
335 GEPNoWrapFlags::none(), {}, DL, Name));
336 }
337
340 const Twine &Name = "", const VPIRFlags &Flags = {}) {
341 return tryInsertInstruction(new VPPhi(IncomingValues, Flags, DL, Name));
342 }
343
346 const Twine &Name = "") {
347 return tryInsertInstruction(new VPWidenPHIRecipe(IncomingValues, DL, Name));
348 }
349
351 VPlan &Plan = *getInsertBlock()->getPlan();
352 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
353 if (EC.isScalable()) {
354 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
355 RuntimeEC = EC.getKnownMinValue() == 1
356 ? VScale
357 : createOverflowingOp(Instruction::Mul,
358 {VScale, RuntimeEC}, {true, false});
359 }
360 return RuntimeEC;
361 }
362
363 /// Convert the input value \p Current to the corresponding value of an
364 /// induction with \p Start and \p Step values, using \p Start + \p Current *
365 /// \p Step.
367 FPMathOperator *FPBinOp, VPIRValue *Start,
368 VPValue *Current, VPValue *Step) {
369 return tryInsertInstruction(
370 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step));
371 }
372
374 DebugLoc DL,
375 const VPIRMetadata &Metadata = {}) {
376 return tryInsertInstruction(new VPInstructionWithType(
377 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
378 }
379
381 Type *ResultTy, DebugLoc DL,
382 const VPIRMetadata &Metadata = {}) {
383 return tryInsertInstruction(new VPInstructionWithType(
384 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
385 DL));
386 }
387
389 Type *ResultTy, DebugLoc DL,
390 const VPIRFlags &Flags,
391 const VPIRMetadata &Metadata = {}) {
392 return tryInsertInstruction(
393 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
394 }
395
397 DebugLoc DL) {
398 if (ResultTy == SrcTy)
399 return Op;
400 Instruction::CastOps CastOp =
401 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
402 ? Instruction::Trunc
403 : Instruction::ZExt;
404 return createScalarCast(CastOp, Op, ResultTy, DL);
405 }
406
408 DebugLoc DL) {
409 if (ResultTy == SrcTy)
410 return Op;
411 Instruction::CastOps CastOp =
412 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
413 ? Instruction::Trunc
414 : Instruction::SExt;
415 return createScalarCast(CastOp, Op, ResultTy, DL);
416 }
417
419 Type *ResultTy) {
420 return tryInsertInstruction(new VPWidenCastRecipe(
421 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
422 }
423
426 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
427 VPValue *VF, DebugLoc DL) {
428 return tryInsertInstruction(new VPScalarIVStepsRecipe(
429 IV, Step, VF, InductionOpcode,
430 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
431 }
432
434 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
435 }
436
437 //===--------------------------------------------------------------------===//
438 // RAII helpers.
439 //===--------------------------------------------------------------------===//
440
441 /// RAII object that stores the current insertion point and restores it when
442 /// the object is destroyed.
444 VPBuilder &Builder;
445 VPBasicBlock *Block;
447
448 public:
450 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
451
454
455 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
456 };
457};
458
459/// TODO: The following VectorizationFactor was pulled out of
460/// LoopVectorizationCostModel class. LV also deals with
461/// VectorizerParams::VectorizationFactor.
462/// We need to streamline them.
463
464/// Information about vectorization costs.
466 /// Vector width with best cost.
468
469 /// Cost of the loop with that width.
471
472 /// Cost of the scalar loop.
474
475 /// The minimum trip count required to make vectorization profitable, e.g. due
476 /// to runtime checks.
478
482
483 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
485 return {ElementCount::getFixed(1), 0, 0};
486 }
487
488 bool operator==(const VectorizationFactor &rhs) const {
489 return Width == rhs.Width && Cost == rhs.Cost;
490 }
491
492 bool operator!=(const VectorizationFactor &rhs) const {
493 return !(*this == rhs);
494 }
495};
496
497/// A class that represents two vectorization factors (initialized with 0 by
498/// default). One for fixed-width vectorization and one for scalable
499/// vectorization. This can be used by the vectorizer to choose from a range of
500/// fixed and/or scalable VFs in order to find the most cost-effective VF to
501/// vectorize with.
505
507 : FixedVF(ElementCount::getFixed(0)),
508 ScalableVF(ElementCount::getScalable(0)) {}
510 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
511 }
515 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
516 "Invalid scalable properties");
517 }
518
520
521 /// \return true if either fixed- or scalable VF is non-zero.
522 explicit operator bool() const { return FixedVF || ScalableVF; }
523
524 /// \return true if either fixed- or scalable VF is a valid vector VF.
525 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
526};
527
528/// Holds state needed to make cost decisions before computing costs per-VF,
529/// including the maximum VFs.
531 /// \return True if maximizing vector bandwidth is enabled by the target or
532 /// user options, for the given register kind (scalable or fixed-width).
533 bool useMaxBandwidth(bool IsScalable) const;
534
535 /// \return the maximized element count based on the targets vector
536 /// registers and the loop trip-count, but limited to a maximum safe VF.
537 /// This is a helper function of computeFeasibleMaxVF.
538 ElementCount getMaximizedVFForTarget(unsigned MaxTripCount,
539 unsigned SmallestType,
540 unsigned WidestType,
541 ElementCount MaxSafeVF, unsigned UserIC,
542 bool FoldTailByMasking,
543 bool RequiresScalarEpilogue);
544
545 /// If \p VF * \p UserIC > MaxTripcount, clamps VF to the next lower VF
546 /// that results in VF * UserIC <= MaxTripCount.
547 ElementCount clampVFByMaxTripCount(ElementCount VF, unsigned MaxTripCount,
548 unsigned UserIC, bool FoldTailByMasking,
549 bool RequiresScalarEpilogue) const;
550
551 /// Checks if scalable vectorization is supported and enabled. Caches the
552 /// result to avoid repeated debug dumps for repeated queries.
553 bool isScalableVectorizationAllowed();
554
555 /// \return the maximum legal scalable VF, based on the safe max number
556 /// of elements.
557 ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
558
559 /// Initializes the value of vscale used for tuning the cost model. If
560 /// vscale_range.min == vscale_range.max then return vscale_range.max, else
561 /// return the value returned by the corresponding TTI method.
562 void initializeVScaleForTuning();
563
564 const TargetTransformInfo &TTI;
565 const LoopVectorizationLegality *Legal;
566 const Loop *TheLoop;
567 const Function &F;
569 DemandedBits *DB;
571 const LoopVectorizeHints *Hints;
572
573 /// Cached result of isScalableVectorizationAllowed.
574 std::optional<bool> IsScalableVectorizationAllowed;
575
576 /// Used to store the value of vscale used for tuning the cost model. It is
577 /// initialized during object construction.
578 std::optional<unsigned> VScaleForTuning;
579
580 /// The highest VF possible for this loop, without using MaxBandwidth.
581 FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
582
583 /// All element types found in the loop.
584 SmallPtrSet<Type *, 16> ElementTypesInLoop;
585
586 /// PHINodes of the reductions that should be expanded in-loop. Set by
587 /// collectInLoopReductions.
588 SmallPtrSet<PHINode *, 4> InLoopReductions;
589
590 /// A Map of inloop reduction operations and their immediate chain operand.
591 /// FIXME: This can be removed once reductions can be costed correctly in
592 /// VPlan. This was added to allow quick lookup of the inloop operations.
593 /// Set by collectInLoopReductions.
594 DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
595
596 /// Maximum safe number of elements to be processed per vector iteration,
597 /// which do not prevent store-load forwarding and are safe with regard to the
598 /// memory dependencies. Required for EVL-based vectorization, where this
599 /// value is used as the upper bound of the safe AVL. Set by
600 /// computeFeasibleMaxVF.
601 std::optional<unsigned> MaxSafeElements;
602
603 /// Map of scalar integer values to the smallest bitwidth they can be legally
604 /// represented as. The vector equivalents of these values should be truncated
605 /// to this type.
607
608public:
609 /// The kind of cost that we are calculating.
611
612 /// Whether this loop should be optimized for size based on function attribute
613 /// or profile information.
614 const bool OptForSize;
615
617 const LoopVectorizationLegality *Legal,
618 const Loop *TheLoop, const Function &F,
621 const LoopVectorizeHints *Hints, bool OptForSize)
622 : TTI(TTI), Legal(Legal), TheLoop(TheLoop), F(F), PSE(PSE), DB(DB),
623 ORE(ORE), Hints(Hints),
624 CostKind(F.hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput),
626 initializeVScaleForTuning();
627 }
628
629 /// \return The vscale value used for tuning the cost model.
630 std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
631
632 /// \return True if register pressure should be considered for the given VF.
634
635 /// \return True if scalable vectors are supported by the target or forced.
636 bool supportsScalableVectors() const;
637
638 /// Collect element types in the loop that need widening.
640 const SmallPtrSetImpl<const Value *> *ValuesToIgnore = nullptr);
641
642 /// \return The size (in bits) of the smallest and widest types in the code
643 /// that need to be vectorized. We ignore values that remain scalar such as
644 /// 64 bit loop indices.
645 std::pair<unsigned, unsigned> getSmallestAndWidestTypes() const;
646
647 /// \return An upper bound for the vectorization factors for both
648 /// fixed and scalable vectorization, where the minimum-known number of
649 /// elements is a power-of-2 larger than zero. If scalable vectorization is
650 /// disabled or unsupported, then the scalable part will be equal to
651 /// ElementCount::getScalable(0). Also sets MaxSafeElements.
652 FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount,
653 ElementCount UserVF, unsigned UserIC,
654 bool FoldTailByMasking,
655 bool RequiresScalarEpilogue);
656
657 /// Return maximum safe number of elements to be processed per vector
658 /// iteration, which do not prevent store-load forwarding and are safe with
659 /// regard to the memory dependencies. Required for EVL-based VPlans to
660 /// correctly calculate AVL (application vector length) as min(remaining AVL,
661 /// MaxSafeElements). Set by computeFeasibleMaxVF.
662 /// TODO: need to consider adjusting cost model to use this value as a
663 /// vectorization factor for EVL-based vectorization.
664 std::optional<unsigned> getMaxSafeElements() const { return MaxSafeElements; }
665
666 /// Returns true if we should use strict in-order reductions for the given
667 /// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
668 /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
669 /// of FP operations.
670 bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const;
671
672 /// Returns true if the target machine supports masked store operation
673 /// for the given \p DataType and kind of access to \p Ptr.
674 bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment,
675 unsigned AddressSpace) const;
676
677 /// Returns true if the target machine supports masked load operation
678 /// for the given \p DataType and kind of access to \p Ptr.
679 bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment,
680 unsigned AddressSpace) const;
681
682 /// Returns true if the target machine can represent \p V as a masked gather
683 /// or scatter operation.
684 bool isLegalGatherOrScatter(Value *V, ElementCount VF) const;
685
686 /// Split reductions into those that happen in the loop, and those that
687 /// happen outside. In-loop reductions are collected into InLoopReductions.
688 /// InLoopReductionImmediateChains is filled with each in-loop reduction
689 /// operation and its immediate chain operand for use during cost modelling.
691
692 /// Returns true if the Phi is part of an inloop reduction.
693 bool isInLoopReduction(PHINode *Phi) const {
694 return InLoopReductions.contains(Phi);
695 }
696
697 /// Returns the set of in-loop reduction PHIs.
699 return InLoopReductions;
700 }
701
702 /// Returns the immediate chain operand of in-loop reduction operation \p I,
703 /// or nullptr if \p I is not an in-loop reduction operation.
705 return InLoopReductionImmediateChains.lookup(I);
706 }
707
708 /// Check whether vectorization would require runtime checks. When optimizing
709 /// for size, returning true here aborts vectorization.
711
712 /// Compute smallest bitwidth each instruction can be represented with.
713 /// The vector equivalents of these instructions should be truncated to this
714 /// type.
716
717 /// \returns The smallest bitwidth each instruction can be represented with.
719 return MinBWs;
720 }
721};
722
723/// Planner drives the vectorization process after having passed
724/// Legality checks.
726 /// The loop that we evaluate.
727 Loop *OrigLoop;
728
729 /// Loop Info analysis.
730 LoopInfo *LI;
731
732 /// The dominator tree.
733 DominatorTree *DT;
734
735 /// Target Library Info.
736 const TargetLibraryInfo *TLI;
737
738 /// Target Transform Info.
739 const TargetTransformInfo &TTI;
740
741 /// The legality analysis.
743
744 /// The profitability analysis.
746
747 /// VF selection state independent of cost-modeling decisions.
748 VFSelectionContext &Config;
749
750 /// The interleaved access analysis.
752
754
755 const LoopVectorizeHints &Hints;
756
758
760
761 /// Profitable vector factors.
763
764 /// A builder used to construct the current plan.
765 VPBuilder Builder;
766
767 /// Computes the cost of \p Plan for vectorization factor \p VF.
768 ///
769 /// The current implementation requires access to the
770 /// LoopVectorizationLegality to handle inductions and reductions, which is
771 /// why it is kept separate from the VPlan-only cost infrastructure.
772 ///
773 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
774 /// been retired.
775 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
776
777 /// Precompute costs for certain instructions using the legacy cost model. The
778 /// function is used to bring up the VPlan-based cost model to initially avoid
779 /// taking different decisions due to inaccuracies in the legacy cost model.
780 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
781 VPCostContext &CostCtx) const;
782
783public:
785 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
790 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
791 Config(Config), IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
792
793 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
794 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
795 /// interleaving should be avoided up-front, no plans are generated.
796 void plan(ElementCount UserVF, unsigned UserIC);
797
798 /// Use the VPlan-native path to plan how to best vectorize, return the best
799 /// VF and its cost.
801
802 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
803 /// for each VF.
804 VPlan &getPlanFor(ElementCount VF) const;
805
806 /// Compute and return the most profitable vectorization factor and the
807 /// corresponding best VPlan. Also collect all profitable VFs in
808 /// ProfitableVFs.
809 std::pair<VectorizationFactor, VPlan *> computeBestVF();
810
811 /// \return The desired interleave count.
812 /// If interleave count has been specified by metadata it will be returned.
813 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
814 /// are the selected vectorization factor and the cost of the selected VF.
815 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
816 InstructionCost LoopCost);
817
818 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
819 /// according to the best selected \p VF and \p UF.
820 ///
821 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
822 /// fixed.
823 ///
824 /// Returns a mapping of SCEVs to their expanded IR values.
825 /// Note that this is a temporary workaround needed due to the current
826 /// epilogue handling.
828 None, ///< Not part of epilogue vectorization.
829 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
830 Epilogue ///< Vectorizing the epilogue loop.
831 };
833 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
835 EpilogueVectorizationKind EpilogueVecKind =
837
838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
839 void printPlans(raw_ostream &O);
840#endif
841
842 /// Look through the existing plans and return true if we have one with
843 /// vectorization factor \p VF.
845 return any_of(VPlans,
846 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
847 }
848
849 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
850 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
851 /// returned value holds for the entire \p Range.
852 static bool
853 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
854 VFRange &Range);
855
856 /// \return A VPlan for the most profitable epilogue vectorization, with its
857 /// VF narrowed to the chosen factor. The returned plan is a duplicate.
858 /// Returns nullptr if epilogue vectorization is not supported or not
859 /// profitable for the loop.
860 std::unique_ptr<VPlan>
861 selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC);
862
863 /// Emit remarks for recipes with invalid costs in the available VPlans.
865
866 /// Create a check to \p Plan to see if the vector loop should be executed
867 /// based on its trip count.
868 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
869 ElementCount MinProfitableTripCount) const;
870
871 /// Attach the runtime checks of \p RTChecks to \p Plan.
872 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
873 bool HasBranchWeights) const;
874
875 /// Update loop metadata and profile info for both the scalar remainder loop
876 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
877 /// loop on the vector loop and replaces vectorizer-specific metadata. The
878 /// loop ID of the original loop \p OrigLoopID must be passed, together with
879 /// the average trip count and invocation weight of the original loop (\p
880 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
881 /// cannot be retrieved after the plan has been executed, as the original loop
882 /// may have been removed.
884 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
885 bool VectorizingEpilogue, MDNode *OrigLoopID,
886 std::optional<unsigned> OrigAverageTripCount,
887 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
888 bool DisableRuntimeUnroll);
889
890protected:
891 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
892 /// according to the information gathered by Legal when it checked if it is
893 /// legal to vectorize the loop.
894 void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
895
896private:
897 /// Build a VPlan according to the information gathered by Legal. \return a
898 /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
899 /// exclusive, possibly decreasing \p Range.End. If no VPlan can be built for
900 /// the input range, set the largest included VF to the maximum VF for which
901 /// no plan could be built.
902 VPlanPtr tryToBuildVPlan(VFRange &Range);
903
904 /// Build a VPlan using VPRecipes according to the information gather by
905 /// Legal. This method is only used for the legacy inner loop vectorizer.
906 /// \p Range's largest included VF is restricted to the maximum VF the
907 /// returned VPlan is valid for. If no VPlan can be built for the input range,
908 /// set the largest included VF to the maximum VF for which no plan could be
909 /// built. Each VPlan is built starting from a copy of \p InitialPlan, which
910 /// is a plain CFG VPlan wrapping the original scalar loop.
911 VPlanPtr tryToBuildVPlanWithVPRecipes(VPlanPtr InitialPlan, VFRange &Range);
912
913 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
914 /// according to the information gathered by Legal when it checked if it is
915 /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
916 void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
917
918 /// Add ComputeReductionResult recipes to the middle block to compute the
919 /// final reduction results. Add Select recipes to the latch block when
920 /// folding tail, to feed ComputeReductionResult with the last or penultimate
921 /// iteration values according to the header mask.
922 void addReductionResultComputation(VPlanPtr &Plan,
923 VPRecipeBuilder &RecipeBuilder,
924 ElementCount MinVF);
925
926 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
927 /// that of B.
928 bool isMoreProfitable(const VectorizationFactor &A,
929 const VectorizationFactor &B, bool HasTail,
930 bool IsEpilogue = false) const;
931
932 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
933 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
934 bool isMoreProfitable(const VectorizationFactor &A,
935 const VectorizationFactor &B,
936 const unsigned MaxTripCount, bool HasTail,
937 bool IsEpilogue = false) const;
938
939 /// Determines if we have the infrastructure to vectorize the loop and its
940 /// epilogue, assuming the main loop is vectorized by \p MainPlan.
941 bool isCandidateForEpilogueVectorization(VPlan &MainPlan) const;
942};
943
944} // namespace llvm
945
946#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, EpilogueVectorizationKind EpilogueVecKind=EpilogueVectorizationKind::None)
EpilogueVectorizationKind
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
@ MainLoop
Vectorizing the main loop of epilogue vectorization.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1691
VectorizationFactor planInVPlanNativePath(ElementCount UserVF)
Use the VPlan-native path to plan how to best vectorize, return the best VF and its cost.
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1742
void buildVPlans(ElementCount MinVF, ElementCount MaxVF)
Build VPlans for power-of-2 VF's between MinVF and MaxVF inclusive, according to the information gath...
Definition VPlan.cpp:1675
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const
Attach the runtime checks of RTChecks to Plan.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, VFSelectionContext &Config, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1656
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1848
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
std::unique_ptr< VPlan > selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC)
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
std::pair< VectorizationFactor, VPlan * > computeBestVF()
Compute and return the most profitable vectorization factor and the corresponding best VPlan.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
Holds state needed to make cost decisions before computing costs per-VF, including the maximum VFs.
const bool OptForSize
Whether this loop should be optimized for size based on function attribute or profile information.
bool isInLoopReduction(PHINode *Phi) const
Returns true if the Phi is part of an inloop reduction.
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool isLegalMaskedStore(Type *DataType, Value *Ptr, Align Alignment, unsigned AddressSpace) const
Returns true if the target machine supports masked store operation for the given DataType and kind of...
bool isLegalMaskedLoad(Type *DataType, Value *Ptr, Align Alignment, unsigned AddressSpace) const
Returns true if the target machine supports masked load operation for the given DataType and kind of ...
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
const SmallPtrSetImpl< PHINode * > & getInLoopReductions() const
Returns the set of in-loop reduction PHIs.
std::optional< unsigned > getMaxSafeElements() const
Return maximum safe number of elements to be processed per vector iteration, which do not prevent sto...
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
const MapVector< Instruction *, uint64_t > & getMinimalBitwidths() const
VFSelectionContext(const TargetTransformInfo &TTI, const LoopVectorizationLegality *Legal, const Loop *TheLoop, const Function &F, PredicatedScalarEvolution &PSE, DemandedBits *DB, OptimizationRemarkEmitter *ORE, const LoopVectorizeHints *Hints, bool OptForSize)
Instruction * getInLoopReductionImmediateChain(Instruction *I) const
Returns the immediate chain operand of in-loop reduction operation I, or nullptr if I is not an in-lo...
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4149
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4176
iterator end()
Definition VPlan.h:4186
VPlan * getPlan()
Definition VPlan.cpp:178
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
T * insert(T *R)
Insert R at the current insertion point. Returns R unchanged.
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1641
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLastActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPBuilder(VPRecipeBase *InsertPt)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3898
Recipe to expand a SCEV expression.
Definition VPlan.h:3746
Class to record and manage LLVM IR flags.
Definition VPlan.h:687
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1167
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1511
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1222
@ VScale
Returns the value for vscale.
Definition VPlan.h:1331
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:405
VPBasicBlock * getParent()
Definition VPlan.h:479
Helper class to create VPRecipies from IR instructions.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:3969
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1832
A recipe for widened phis.
Definition VPlan.h:2579
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4507
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4805
LLVM Value Representation.
Definition Value.h:75
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This is an optimization pass for GlobalISel generic memory operations.
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
@ Offset
Definition DWP.cpp:557
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
cl::opt< unsigned > ForceTargetInstructionCost
TargetTransformInfo TTI
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)