LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
31
32namespace {
33class GeneratedRTChecks;
34}
35
36namespace llvm {
37
38class LoopInfo;
39class DominatorTree;
45class LoopVersioning;
48class VPRecipeBuilder;
49struct VPRegisterUsage;
50struct VFRange;
51
55
56/// \return An upper bound for vscale based on TTI or the vscale_range
57/// attribute.
58std::optional<unsigned> getMaxVScale(const Function &F,
60
61// Utility functions that are used by different vectorization classes
63
64/// Reports a vectorization failure: print \p DebugMsg for debugging
65/// purposes along with the corresponding optimization remark \p RemarkName.
66/// If \p I is passed, it is an instruction that prevents vectorization.
67/// Otherwise, the loop \p TheLoop is used for the location of the remark.
68void reportVectorizationFailure(const StringRef DebugMsg,
69 const StringRef OREMsg, const StringRef ORETag,
71 const Loop *TheLoop, Instruction *I = nullptr);
72
73/// Same as above, but the debug message and optimization remark are identical
74inline void reportVectorizationFailure(const StringRef DebugMsg,
75 const StringRef ORETag,
77 const Loop *TheLoop,
78 Instruction *I = nullptr) {
79 reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I);
80}
81
82/// Reports an informative message: print \p Msg for debugging purposes as well
83/// as an optimization remark. Uses either \p I as location of the remark, or
84/// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
85/// remark.
86void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
88 const Loop *TheLoop, Instruction *I = nullptr,
89 DebugLoc DL = {});
90
91/// Report successful vectorization of the loop. In case an outer loop is
92/// vectorized, prepend "outer" to the vectorization remark.
93void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
94 ElementCount VFWidth, unsigned IC);
95
96} // namespace LoopVectorizationUtils
97
98/// VPlan-based builder utility analogous to IRBuilder.
99class VPBuilder {
100 VPBasicBlock *BB = nullptr;
102
103 /// Insert \p VPI in BB at InsertPt if BB is set.
104 template <typename T> T *tryInsertInstruction(T *R) {
105 if (BB)
106 BB->insert(R, InsertPt);
107 return R;
108 }
109
110 VPInstruction *createInstruction(unsigned Opcode,
111 ArrayRef<VPValue *> Operands,
112 const VPIRMetadata &MD, DebugLoc DL,
113 const Twine &Name = "") {
114 return tryInsertInstruction(
115 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
116 }
117
118public:
119 VPlan &getPlan() const {
120 assert(getInsertBlock() && "Insert block must be set");
121 return *getInsertBlock()->getPlan();
122 }
123
124 VPBuilder() = default;
125 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
126 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
128 setInsertPoint(TheBB, IP);
129 }
130
131 /// Clear the insertion point: created instructions will not be inserted into
132 /// a block.
134 BB = nullptr;
135 InsertPt = VPBasicBlock::iterator();
136 }
137
138 VPBasicBlock *getInsertBlock() const { return BB; }
139 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
140
141 /// Create a VPBuilder to insert after \p R.
143 VPBuilder B;
144 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
145 return B;
146 }
147
148 /// InsertPoint - A saved insertion point.
150 VPBasicBlock *Block = nullptr;
152
153 public:
154 /// Creates a new insertion point which doesn't point to anything.
155 VPInsertPoint() = default;
156
157 /// Creates a new insertion point at the given location.
159 : Block(InsertBlock), Point(InsertPoint) {}
160
161 /// Returns true if this insert point is set.
162 bool isSet() const { return Block != nullptr; }
163
164 VPBasicBlock *getBlock() const { return Block; }
165 VPBasicBlock::iterator getPoint() const { return Point; }
166 };
167
168 /// Sets the current insert point to a previously-saved location.
170 if (IP.isSet())
171 setInsertPoint(IP.getBlock(), IP.getPoint());
172 else
174 }
175
176 /// This specifies that created VPInstructions should be appended to the end
177 /// of the specified block.
179 assert(TheBB && "Attempting to set a null insert point");
180 BB = TheBB;
181 InsertPt = BB->end();
182 }
183
184 /// This specifies that created instructions should be inserted at the
185 /// specified point.
187 BB = TheBB;
188 InsertPt = IP;
189 }
190
191 /// This specifies that created instructions should be inserted at the
192 /// specified point.
194 BB = IP->getParent();
195 InsertPt = IP->getIterator();
196 }
197
198 /// Insert \p R at the current insertion point. Returns \p R unchanged.
199 template <typename T> [[maybe_unused]] T *insert(T *R) {
200 BB->insert(R, InsertPt);
201 return R;
202 }
203
204 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
205 /// its underlying Instruction.
207 Instruction *Inst = nullptr,
208 const VPIRFlags &Flags = {},
209 const VPIRMetadata &MD = {},
211 const Twine &Name = "",
212 Type *ResultTy = nullptr) {
213 VPInstruction *NewVPInst = tryInsertInstruction(
214 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name, ResultTy));
215 NewVPInst->setUnderlyingValue(Inst);
216 return NewVPInst;
217 }
219 DebugLoc DL, const Twine &Name = "") {
220 return createInstruction(Opcode, Operands, {}, DL, Name);
221 }
223 const VPIRFlags &Flags,
225 const Twine &Name = "") {
226 return tryInsertInstruction(
227 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
228 }
229
231 Type *ResultTy, const VPIRFlags &Flags = {},
233 const Twine &Name = "") {
234 return tryInsertInstruction(new VPInstructionWithType(
235 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
236 }
237
240 const Twine &Name = "") {
241 // Assume that the maximum possible number of elements in a vector fits
242 // within the index type for the default address space.
243 VPlan &Plan = getPlan();
244 Type *IndexTy = Plan.getDataLayout().getIndexType(Plan.getContext(), 0);
245 return tryInsertInstruction(new VPInstruction(
246 VPInstruction::FirstActiveLane, Masks, {}, {}, DL, Name, IndexTy));
247 }
248
251 const Twine &Name = "") {
252 // Assume that the maximum possible number of elements in a vector fits
253 // within the index type for the default address space.
254 VPlan &Plan = getPlan();
255 Type *IndexTy = Plan.getDataLayout().getIndexType(Plan.getContext(), 0);
256 return tryInsertInstruction(new VPInstruction(
257 VPInstruction::LastActiveLane, Masks, {}, {}, DL, Name, IndexTy));
258 }
259
261 unsigned Opcode, ArrayRef<VPValue *> Operands,
262 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
263 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
264 return tryInsertInstruction(
265 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
266 }
267
270 const Twine &Name = "") {
271 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
272 }
273
276 const Twine &Name = "") {
277 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
278 Name);
279 }
280
283 const Twine &Name = "") {
284
285 return tryInsertInstruction(new VPInstruction(
286 Instruction::BinaryOps::Or, {LHS, RHS},
287 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
288 }
289
292 const Twine &Name = "",
293 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
294 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
295 Name);
296 }
297
298 VPInstruction *
300 const Twine &Name = "",
301 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
302 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
303 Name);
304 }
305
311
317
319 VPValue *FalseVal,
321 const Twine &Name = "",
322 const VPIRFlags &Flags = {}) {
323 return tryInsertInstruction(new VPInstruction(
324 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
325 }
326
327 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
328 /// and \p B.
331 const Twine &Name = "") {
333 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
334 return tryInsertInstruction(
335 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
336 }
337
338 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
339 /// and \p B.
342 const Twine &Name = "") {
344 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
345 return tryInsertInstruction(
346 new VPInstruction(Instruction::FCmp, {A, B},
347 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
348 }
349
350 /// Create an AnyOf reduction pattern: or-reduce \p ChainOp, freeze the
351 /// result, then select between \p TrueVal and \p FalseVal.
353 VPValue *FalseVal,
355
358 const Twine &Name = "") {
359 return tryInsertInstruction(
361 GEPNoWrapFlags::none(), {}, DL, Name));
362 }
363
365 GEPNoWrapFlags GEPFlags,
367 const Twine &Name = "") {
368 return tryInsertInstruction(new VPInstruction(
369 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
370 }
371
374 const Twine &Name = "") {
375 return tryInsertInstruction(
377 GEPNoWrapFlags::none(), {}, DL, Name));
378 }
379
382 const Twine &Name = "", const VPIRFlags &Flags = {},
383 Type *ResultTy = nullptr) {
384 return tryInsertInstruction(
385 new VPPhi(IncomingValues, Flags, DL, Name, ResultTy));
386 }
387
390 const Twine &Name = "") {
391 return tryInsertInstruction(new VPWidenPHIRecipe(IncomingValues, DL, Name));
392 }
393
395 VPlan &Plan = *getInsertBlock()->getPlan();
396 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
397 if (EC.isScalable()) {
398 VPValue *VScale = createNaryOp(VPInstruction::VScale, {}, Ty);
399 RuntimeEC = EC.getKnownMinValue() == 1
400 ? VScale
401 : createOverflowingOp(Instruction::Mul,
402 {VScale, RuntimeEC}, {true, false});
403 }
404 return RuntimeEC;
405 }
406
407 /// Convert the input value \p Current to the corresponding value of an
408 /// induction with \p Start and \p Step values, using \p Start + \p Current *
409 /// \p Step.
411 FPMathOperator *FPBinOp, VPIRValue *Start,
412 VPValue *Current, VPValue *Step) {
413 return tryInsertInstruction(
414 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step));
415 }
416
418 DebugLoc DL,
419 const VPIRMetadata &Metadata = {}) {
420 return tryInsertInstruction(new VPInstructionWithType(
421 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
422 }
423
425 Type *ResultTy, DebugLoc DL,
426 const VPIRMetadata &Metadata = {}) {
427 return tryInsertInstruction(new VPInstructionWithType(
428 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
429 DL));
430 }
431
433 Type *ResultTy, DebugLoc DL,
434 const VPIRFlags &Flags,
435 const VPIRMetadata &Metadata = {}) {
436 return tryInsertInstruction(
437 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
438 }
439
441 DebugLoc DL) {
442 if (ResultTy == SrcTy)
443 return Op;
444 Instruction::CastOps CastOp =
445 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
446 ? Instruction::Trunc
447 : Instruction::ZExt;
448 return createScalarCast(CastOp, Op, ResultTy, DL);
449 }
450
452 DebugLoc DL) {
453 if (ResultTy == SrcTy)
454 return Op;
455 Instruction::CastOps CastOp =
456 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
457 ? Instruction::Trunc
458 : Instruction::SExt;
459 return createScalarCast(CastOp, Op, ResultTy, DL);
460 }
461
463 return tryInsertInstruction(
464 new VPInstruction(Instruction::Freeze, Op, {}, {}, DL));
465 }
466
468 Type *ResultTy) {
469 return tryInsertInstruction(new VPWidenCastRecipe(
470 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
471 }
472
473 /// Create a single-scalar recipe with \p Opcode and \p Operands without
474 /// inserting it.
476 ArrayRef<VPValue *> Operands,
477 VPValue *Mask,
478 const VPIRFlags &Flags,
479 const VPIRMetadata &Metadata,
480 DebugLoc DL, Instruction *UV) {
481 if (Instruction::isCast(Opcode)) {
482 assert(!Mask && "Cast cannot be predicated");
483 return new VPInstructionWithType(Opcode, Operands, UV->getType(), Flags,
484 Metadata, DL, UV->getName(), UV);
485 }
486 return new VPReplicateRecipe(UV, Operands, /*IsSingleScalar=*/true, Mask,
487 Flags, Metadata, DL);
488 }
489
492 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
493 VPValue *VF, DebugLoc DL) {
494 return tryInsertInstruction(new VPScalarIVStepsRecipe(
495 IV, Step, VF, InductionOpcode,
496 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
497 }
498
500 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
501 }
502
504 createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
505 GEPNoWrapFlags GEPFlags, DebugLoc DL) {
506 return tryInsertInstruction(
507 new VPVectorPointerRecipe(Ptr, SourceElementTy, Stride, GEPFlags, DL));
508 }
509
511 Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments,
512 Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL) {
513 return tryInsertInstruction(new VPWidenMemIntrinsicRecipe(
514 VectorIntrinsicID, CallArguments, Ty, Alignment, MD, DL));
515 }
516
517 //===--------------------------------------------------------------------===//
518 // RAII helpers.
519 //===--------------------------------------------------------------------===//
520
521 /// RAII object that stores the current insertion point and restores it when
522 /// the object is destroyed.
524 VPBuilder &Builder;
525 VPBasicBlock *Block;
527
528 public:
530 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
531
534
535 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
536 };
537};
538
539/// TODO: The following VectorizationFactor was pulled out of
540/// LoopVectorizationCostModel class. LV also deals with
541/// VectorizerParams::VectorizationFactor.
542/// We need to streamline them.
543
544/// Information about vectorization costs.
546 /// Vector width with best cost.
548
549 /// Cost of the loop with that width.
551
552 /// Cost of the scalar loop.
554
555 /// The minimum trip count required to make vectorization profitable, e.g. due
556 /// to runtime checks.
558
562
563 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
565 return {ElementCount::getFixed(1), 0, 0};
566 }
567
568 bool operator==(const VectorizationFactor &rhs) const {
569 return Width == rhs.Width && Cost == rhs.Cost;
570 }
571
572 bool operator!=(const VectorizationFactor &rhs) const {
573 return !(*this == rhs);
574 }
575};
576
577/// A class that represents two vectorization factors (initialized with 0 by
578/// default). One for fixed-width vectorization and one for scalable
579/// vectorization. This can be used by the vectorizer to choose from a range of
580/// fixed and/or scalable VFs in order to find the most cost-effective VF to
581/// vectorize with.
585
587 : FixedVF(ElementCount::getFixed(0)),
588 ScalableVF(ElementCount::getScalable(0)) {}
590 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
591 }
595 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
596 "Invalid scalable properties");
597 }
598
600
601 /// \return true if either fixed- or scalable VF is non-zero.
602 explicit operator bool() const { return FixedVF || ScalableVF; }
603
604 /// \return true if either fixed- or scalable VF is a valid vector VF.
605 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
606};
607
608/// Holds state needed to make cost decisions before computing costs per-VF,
609/// including the maximum VFs.
611 /// \return True if maximizing vector bandwidth is enabled by the target or
612 /// user options, for the given register kind (scalable or fixed-width).
613 bool useMaxBandwidth(bool IsScalable) const;
614
615 /// \return the maximized element count based on the targets vector
616 /// registers and the loop trip-count, but limited to a maximum safe VF.
617 /// This is a helper function of computeFeasibleMaxVF.
618 ElementCount getMaximizedVFForTarget(unsigned MaxTripCount,
619 unsigned SmallestType,
620 unsigned WidestType,
621 ElementCount MaxSafeVF, unsigned UserIC,
622 bool FoldTailByMasking,
623 bool RequiresScalarEpilogue);
624
625 /// If \p VF * \p UserIC > MaxTripcount, clamps VF to the next lower VF
626 /// that results in VF * UserIC <= MaxTripCount.
627 ElementCount clampVFByMaxTripCount(ElementCount VF, unsigned MaxTripCount,
628 unsigned UserIC, bool FoldTailByMasking,
629 bool RequiresScalarEpilogue) const;
630
631 /// Checks if scalable vectorization is supported and enabled. Caches the
632 /// result to avoid repeated debug dumps for repeated queries.
633 bool isScalableVectorizationAllowed();
634
635 /// \return the maximum legal scalable VF, based on the safe max number
636 /// of elements.
637 ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
638
639 /// Initializes the value of vscale used for tuning the cost model. If
640 /// vscale_range.min == vscale_range.max then return vscale_range.max, else
641 /// return the value returned by the corresponding TTI method.
642 void initializeVScaleForTuning();
643
644 const TargetTransformInfo &TTI;
645 const LoopVectorizationLegality *Legal;
646 const Loop *TheLoop;
647 const Function &F;
649 DemandedBits *DB;
651 const LoopVectorizeHints *Hints;
652
653 /// Cached result of isScalableVectorizationAllowed.
654 std::optional<bool> IsScalableVectorizationAllowed;
655
656 /// Used to store the value of vscale used for tuning the cost model. It is
657 /// initialized during object construction.
658 std::optional<unsigned> VScaleForTuning;
659
660 /// The highest VF possible for this loop, without using MaxBandwidth.
661 FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
662
663 /// All element types found in the loop.
664 SmallPtrSet<Type *, 16> ElementTypesInLoop;
665
666 /// PHINodes of the reductions that should be expanded in-loop. Set by
667 /// collectInLoopReductions.
668 SmallPtrSet<PHINode *, 4> InLoopReductions;
669
670 /// A Map of inloop reduction operations and their immediate chain operand.
671 /// FIXME: This can be removed once reductions can be costed correctly in
672 /// VPlan. This was added to allow quick lookup of the inloop operations.
673 /// Set by collectInLoopReductions.
674 DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
675
676 /// Maximum safe number of elements to be processed per vector iteration,
677 /// which do not prevent store-load forwarding and are safe with regard to the
678 /// memory dependencies. Required for EVL-based vectorization, where this
679 /// value is used as the upper bound of the safe AVL. Set by
680 /// computeFeasibleMaxVF.
681 std::optional<unsigned> MaxSafeElements;
682
683 /// Map of scalar integer values to the smallest bitwidth they can be legally
684 /// represented as. The vector equivalents of these values should be truncated
685 /// to this type.
687
688public:
689 /// The kind of cost that we are calculating.
691
692 /// Whether this loop should be optimized for size based on function attribute
693 /// or profile information.
694 const bool OptForSize;
695
697 const LoopVectorizationLegality *Legal,
698 const Loop *TheLoop, const Function &F,
701 const LoopVectorizeHints *Hints, bool OptForSize)
702 : TTI(TTI), Legal(Legal), TheLoop(TheLoop), F(F), PSE(PSE), DB(DB),
703 ORE(ORE), Hints(Hints),
704 CostKind(F.hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput),
706 initializeVScaleForTuning();
707 }
708
709 /// \return The vscale value used for tuning the cost model.
710 std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
711
712 /// \return True if register pressure should be considered for the given VF.
714
715 /// \return True if scalable vectors are supported by the target or forced.
716 bool supportsScalableVectors() const;
717
718 /// Collect element types in the loop that need widening.
720 const SmallPtrSetImpl<const Value *> *ValuesToIgnore = nullptr);
721
722 /// \return The size (in bits) of the smallest and widest types in the code
723 /// that need to be vectorized. We ignore values that remain scalar such as
724 /// 64 bit loop indices.
725 std::pair<unsigned, unsigned> getSmallestAndWidestTypes() const;
726
727 /// \return An upper bound for the vectorization factors for both
728 /// fixed and scalable vectorization, where the minimum-known number of
729 /// elements is a power-of-2 larger than zero. If scalable vectorization is
730 /// disabled or unsupported, then the scalable part will be equal to
731 /// ElementCount::getScalable(0). Also sets MaxSafeElements.
732 FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount,
733 ElementCount UserVF, unsigned UserIC,
734 bool FoldTailByMasking,
735 bool RequiresScalarEpilogue);
736
737 /// Return maximum safe number of elements to be processed per vector
738 /// iteration, which do not prevent store-load forwarding and are safe with
739 /// regard to the memory dependencies. Required for EVL-based VPlans to
740 /// correctly calculate AVL (application vector length) as min(remaining AVL,
741 /// MaxSafeElements). Set by computeFeasibleMaxVF.
742 /// TODO: need to consider adjusting cost model to use this value as a
743 /// vectorization factor for EVL-based vectorization.
744 std::optional<unsigned> getMaxSafeElements() const { return MaxSafeElements; }
745
746 /// Returns true if we should use strict in-order reductions for the given
747 /// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
748 /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
749 /// of FP operations.
750 bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const;
751
752 /// Returns true if the target machine supports masked loads or stores
753 /// for \p I's data type and alignment. The caller must ensure the access is
754 /// consecutive or part of an interleave group.
756
757 /// Returns true if the target machine can represent \p V as a masked gather
758 /// or scatter operation.
759 bool isLegalGatherOrScatter(Value *V, ElementCount VF) const;
760
761 /// Split reductions into those that happen in the loop, and those that
762 /// happen outside. In-loop reductions are collected into InLoopReductions.
763 /// InLoopReductionImmediateChains is filled with each in-loop reduction
764 /// operation and its immediate chain operand for use during cost modelling.
766
767 /// Returns true if the Phi is part of an inloop reduction.
768 bool isInLoopReduction(PHINode *Phi) const {
769 return InLoopReductions.contains(Phi);
770 }
771
772 /// Returns the set of in-loop reduction PHIs.
774 return InLoopReductions;
775 }
776
777 /// Returns the immediate chain operand of in-loop reduction operation \p I,
778 /// or nullptr if \p I is not an in-loop reduction operation.
780 return InLoopReductionImmediateChains.lookup(I);
781 }
782
783 /// Check whether vectorization would require runtime checks. When optimizing
784 /// for size, returning true here aborts vectorization.
786
787 /// Returns a scalable VF to use for outer-loop vectorization if the target
788 /// supports it and a fixed VF otherwise.
790
791 /// Compute smallest bitwidth each instruction can be represented with.
792 /// The vector equivalents of these instructions should be truncated to this
793 /// type.
795
796 /// \returns The smallest bitwidth each instruction can be represented with.
798 return MinBWs;
799 }
800};
801
802/// Planner drives the vectorization process after having passed
803/// Legality checks.
805 /// The loop that we evaluate.
806 Loop *OrigLoop;
807
808 /// Loop Info analysis.
809 LoopInfo *LI;
810
811 /// The dominator tree.
812 DominatorTree *DT;
813
814 /// Target Library Info.
815 const TargetLibraryInfo *TLI;
816
817 /// Target Transform Info.
818 const TargetTransformInfo &TTI;
819
820 /// The legality analysis.
822
823 /// The profitability analysis.
825
826 /// VF selection state independent of cost-modeling decisions.
827 VFSelectionContext &Config;
828
829 /// The interleaved access analysis.
831
833
834 const LoopVectorizeHints &Hints;
835
837
839
840 /// Profitable vector factors.
842
843 /// A builder used to construct the current plan.
844 VPBuilder Builder;
845
846 /// Computes the cost of \p Plan for vectorization factor \p VF.
847 ///
848 /// The current implementation requires access to the
849 /// LoopVectorizationLegality to handle inductions and reductions, which is
850 /// why it is kept separate from the VPlan-only cost infrastructure.
851 ///
852 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
853 /// been retired.
854 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
855
856 /// Precompute costs for certain instructions using the legacy cost model. The
857 /// function is used to bring up the VPlan-based cost model to initially avoid
858 /// taking different decisions due to inaccuracies in the legacy cost model.
859 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
860 VPCostContext &CostCtx) const;
861
862public:
864 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
869 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
870 Config(Config), IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
871
872 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
873 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
874 /// interleaving should be avoided up-front, no plans are generated.
875 void plan(ElementCount UserVF, unsigned UserIC);
876
877 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
878 /// for each VF.
879 VPlan &getPlanFor(ElementCount VF) const;
880
881 /// Compute and return the most profitable vectorization factor and the
882 /// corresponding best VPlan. Also collect all profitable VFs in
883 /// ProfitableVFs.
884 std::pair<VectorizationFactor, VPlan *> computeBestVF();
885
886 /// \return The desired interleave count.
887 /// If interleave count has been specified by metadata it will be returned.
888 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
889 /// are the selected vectorization factor and the cost of the selected VF.
890 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
891 InstructionCost LoopCost);
892
893 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
894 /// according to the best selected \p VF and \p UF.
895 ///
896 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
897 /// fixed.
898 ///
899 /// Returns a mapping of SCEVs to their expanded IR values.
900 /// Note that this is a temporary workaround needed due to the current
901 /// epilogue handling.
903 None, ///< Not part of epilogue vectorization.
904 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
905 Epilogue ///< Vectorizing the epilogue loop.
906 };
908 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
910 EpilogueVectorizationKind EpilogueVecKind =
912
913#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
914 void printPlans(raw_ostream &O);
915#endif
916
917 /// Look through the existing plans and return true if we have one with
918 /// vectorization factor \p VF.
920 return any_of(VPlans,
921 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
922 }
923
924 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
925 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
926 /// returned value holds for the entire \p Range.
927 static bool
928 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
929 VFRange &Range);
930
931 /// \return A VPlan for the most profitable epilogue vectorization, with its
932 /// VF narrowed to the chosen factor. The returned plan is a duplicate.
933 /// Returns nullptr if epilogue vectorization is not supported or not
934 /// profitable for the loop.
935 std::unique_ptr<VPlan>
936 selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC);
937
938 /// Emit remarks for recipes with invalid costs in the available VPlans.
940
941 /// Create a check to \p Plan to see if the vector loop should be executed
942 /// based on its trip count.
943 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
944 ElementCount MinProfitableTripCount) const;
945
946 /// Attach the runtime checks of \p RTChecks to \p Plan.
947 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
948 bool HasBranchWeights) const;
949
950 /// Update loop metadata and profile info for both the scalar remainder loop
951 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
952 /// loop on the vector loop and replaces vectorizer-specific metadata. The
953 /// loop ID of the original loop \p OrigLoopID must be passed, together with
954 /// the average trip count and invocation weight of the original loop (\p
955 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
956 /// cannot be retrieved after the plan has been executed, as the original loop
957 /// may have been removed.
959 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
960 bool VectorizingEpilogue, MDNode *OrigLoopID,
961 std::optional<unsigned> OrigAverageTripCount,
962 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
963 bool DisableRuntimeUnroll);
964
965private:
966 /// Build an initial VPlan, with HCFG wrapping the original scalar loop and
967 /// scalar transformations applied. Returns null if an initial VPlan cannot
968 /// be built.
969 VPlanPtr tryToBuildVPlan1();
970
971 /// Build a VPlan using VPRecipes according to the information gathered by
972 /// Legal and VPlan-based analysis. For outer loops, performs basic recipe
973 /// conversion only. For inner loops, \p Range's largest included VF is
974 /// restricted to the maximum VF the returned VPlan is valid for. If no VPlan
975 /// can be built for the input range, set the largest included VF to the
976 /// maximum VF for which no plan could be built. Each VPlan is built starting
977 /// from a copy of \p InitialPlan, which is a plain CFG VPlan wrapping the
978 /// original scalar loop.
979 VPlanPtr tryToBuildVPlan(VPlanPtr InitialPlan, VFRange &Range);
980
981 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
982 /// based on \p VPlan1 and according to the information gathered by Legal
983 /// when it checked if it is legal to vectorize the loop.
984 void buildVPlans(VPlan &VPlan1, ElementCount MinVF, ElementCount MaxVF);
985
986 /// Add ComputeReductionResult recipes to the middle block to compute the
987 /// final reduction results. Add Select recipes to the latch block when
988 /// folding tail, to feed ComputeReductionResult with the last or penultimate
989 /// iteration values according to the header mask.
990 void addReductionResultComputation(VPlanPtr &Plan,
991 VPRecipeBuilder &RecipeBuilder,
992 ElementCount MinVF);
993
994 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
995 /// that of B.
996 bool isMoreProfitable(const VectorizationFactor &A,
997 const VectorizationFactor &B, bool HasTail,
998 bool IsEpilogue = false) const;
999
1000 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
1001 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
1002 bool isMoreProfitable(const VectorizationFactor &A,
1003 const VectorizationFactor &B,
1004 const unsigned MaxTripCount, bool HasTail,
1005 bool IsEpilogue = false) const;
1006
1007 /// Determines if we have the infrastructure to vectorize the loop and its
1008 /// epilogue, assuming the main loop is vectorized by \p MainPlan.
1009 bool isCandidateForEpilogueVectorization(VPlan &MainPlan) const;
1010};
1011
1012} // namespace llvm
1013
1014#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:151
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:291
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, EpilogueVectorizationKind EpilogueVecKind=EpilogueVectorizationKind::None)
EpilogueVectorizationKind
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
@ MainLoop
Vectorizing the main loop of epilogue vectorization.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1680
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1731
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const
Attach the runtime checks of RTChecks to Plan.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, VFSelectionContext &Config, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1666
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1837
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
std::unique_ptr< VPlan > selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC)
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
std::pair< VectorizationFactor, VPlan * > computeBestVF()
Compute and return the most profitable vectorization factor and the corresponding best VPlan.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
Holds state needed to make cost decisions before computing costs per-VF, including the maximum VFs.
const bool OptForSize
Whether this loop should be optimized for size based on function attribute or profile information.
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
bool isInLoopReduction(PHINode *Phi) const
Returns true if the Phi is part of an inloop reduction.
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
const SmallPtrSetImpl< PHINode * > & getInLoopReductions() const
Returns the set of in-loop reduction PHIs.
std::optional< unsigned > getMaxSafeElements() const
Return maximum safe number of elements to be processed per vector iteration, which do not prevent sto...
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
const MapVector< Instruction *, uint64_t > & getMinimalBitwidths() const
VFSelectionContext(const TargetTransformInfo &TTI, const LoopVectorizationLegality *Legal, const Loop *TheLoop, const Function &F, PredicatedScalarEvolution &PSE, DemandedBits *DB, OptimizationRemarkEmitter *ORE, const LoopVectorizeHints *Hints, bool OptForSize)
Instruction * getInLoopReductionImmediateChain(Instruction *I) const
Returns the immediate chain operand of in-loop reduction operation I, or nullptr if I is not an in-lo...
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4404
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4431
iterator end()
Definition VPlan.h:4441
VPlan * getPlan()
Definition VPlan.cpp:211
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
T * insert(T *R)
Insert R at the current insertion point. Returns R unchanged.
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPVectorPointerRecipe * createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1653
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPValue * createScalarFreeze(VPValue *Op, Type *ResultTy, DebugLoc DL)
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLastActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder(VPRecipeBase *InsertPt)
VPWidenMemIntrinsicRecipe * createWidenMemIntrinsic(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
static VPSingleDefRecipe * createSingleScalarOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPValue *Mask, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL, Instruction *UV)
Create a single-scalar recipe with Opcode and Operands without inserting it.
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4182
Recipe to expand a SCEV expression.
Definition VPlan.h:4014
Class to record and manage LLVM IR flags.
Definition VPlan.h:695
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1171
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1539
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
@ VScale
Returns the value for vscale.
Definition VPlan.h:1348
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
VPBasicBlock * getParent()
Definition VPlan.h:477
Helper class to create VPRecipies from IR instructions.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3398
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4249
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
Definition VPlan.h:2348
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1878
A recipe for widening vector memory intrinsics.
Definition VPlan.h:2054
A recipe for widened phis.
Definition VPlan.h:2754
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4762
const DataLayout & getDataLayout() const
Definition VPlan.h:4967
LLVMContext & getContext() const
Definition VPlan.h:4963
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5069
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop, ElementCount VFWidth, unsigned IC)
Report successful vectorization of the loop.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
cl::opt< unsigned > ForceTargetInstructionCost
TargetTransformInfo TTI
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)