LLVM 23.0.0git
LoopVectorizationPlanner.h
Go to the documentation of this file.
1//===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file provides a LoopVectorizationPlanner class.
11/// InnerLoopVectorizer vectorizes loops which contain only one basic
12/// LoopVectorizationPlanner - drives the vectorization process after having
13/// passed Legality checks.
14/// The planner builds and optimizes the Vectorization Plans which record the
15/// decisions how to vectorize the given loop. In particular, represent the
16/// control-flow of the vectorized version, the replication of instructions that
17/// are to be scalarized, and interleave access groups.
18///
19/// Also provides a VPlan-based builder utility analogous to IRBuilder.
20/// It provides an instruction-level API for generating VPInstructions while
21/// abstracting away the Recipe manipulation details.
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
25#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
26
27#include "VPlan.h"
28#include "llvm/ADT/SmallSet.h"
31
32namespace {
33class GeneratedRTChecks;
34}
35
36namespace llvm {
37
38class LoopInfo;
39class DominatorTree;
45class LoopVersioning;
48class VPRecipeBuilder;
49struct VPRegisterUsage;
50struct VFRange;
51
55
56/// \return An upper bound for vscale based on TTI or the vscale_range
57/// attribute.
58std::optional<unsigned> getMaxVScale(const Function &F,
60
61// Utility functions that are used by different vectorization classes
63
64/// Reports a vectorization failure: print \p DebugMsg for debugging
65/// purposes along with the corresponding optimization remark \p RemarkName.
66/// If \p I is passed, it is an instruction that prevents vectorization.
67/// Otherwise, the loop \p TheLoop is used for the location of the remark.
68void reportVectorizationFailure(const StringRef DebugMsg,
69 const StringRef OREMsg, const StringRef ORETag,
71 const Loop *TheLoop, Instruction *I = nullptr);
72
73/// Same as above, but the debug message and optimization remark are identical
74inline void reportVectorizationFailure(const StringRef DebugMsg,
75 const StringRef ORETag,
77 const Loop *TheLoop,
78 Instruction *I = nullptr) {
79 reportVectorizationFailure(DebugMsg, DebugMsg, ORETag, ORE, TheLoop, I);
80}
81
82/// Reports an informative message: print \p Msg for debugging purposes as well
83/// as an optimization remark. Uses either \p I as location of the remark, or
84/// otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
85/// remark.
86void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag,
88 const Loop *TheLoop, Instruction *I = nullptr,
89 DebugLoc DL = {});
90
91/// Report successful vectorization of the loop. In case an outer loop is
92/// vectorized, prepend "outer" to the vectorization remark.
93void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
94 ElementCount VFWidth, unsigned IC);
95
96} // namespace LoopVectorizationUtils
97
98/// VPlan-based builder utility analogous to IRBuilder.
99class VPBuilder {
100 VPBasicBlock *BB = nullptr;
102
103 /// Insert \p VPI in BB at InsertPt if BB is set.
104 template <typename T> T *tryInsertInstruction(T *R) {
105 if (BB)
106 BB->insert(R, InsertPt);
107 return R;
108 }
109
110 VPInstruction *createInstruction(unsigned Opcode,
111 ArrayRef<VPValue *> Operands,
112 const VPIRMetadata &MD, DebugLoc DL,
113 const Twine &Name = "") {
114 return tryInsertInstruction(
115 new VPInstruction(Opcode, Operands, {}, MD, DL, Name));
116 }
117
118public:
119 VPlan &getPlan() const {
120 assert(getInsertBlock() && "Insert block must be set");
121 return *getInsertBlock()->getPlan();
122 }
123
124 VPBuilder() = default;
125 VPBuilder(VPBasicBlock *InsertBB) { setInsertPoint(InsertBB); }
126 VPBuilder(VPRecipeBase *InsertPt) { setInsertPoint(InsertPt); }
128 setInsertPoint(TheBB, IP);
129 }
130
131 /// Clear the insertion point: created instructions will not be inserted into
132 /// a block.
134 BB = nullptr;
135 InsertPt = VPBasicBlock::iterator();
136 }
137
138 VPBasicBlock *getInsertBlock() const { return BB; }
139 VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
140
141 /// Create a VPBuilder to insert after \p R.
143 VPBuilder B;
144 B.setInsertPoint(R->getParent(), std::next(R->getIterator()));
145 return B;
146 }
147
148 /// InsertPoint - A saved insertion point.
150 VPBasicBlock *Block = nullptr;
152
153 public:
154 /// Creates a new insertion point which doesn't point to anything.
155 VPInsertPoint() = default;
156
157 /// Creates a new insertion point at the given location.
159 : Block(InsertBlock), Point(InsertPoint) {}
160
161 /// Returns true if this insert point is set.
162 bool isSet() const { return Block != nullptr; }
163
164 VPBasicBlock *getBlock() const { return Block; }
165 VPBasicBlock::iterator getPoint() const { return Point; }
166 };
167
168 /// Sets the current insert point to a previously-saved location.
170 if (IP.isSet())
171 setInsertPoint(IP.getBlock(), IP.getPoint());
172 else
174 }
175
176 /// This specifies that created VPInstructions should be appended to the end
177 /// of the specified block.
179 assert(TheBB && "Attempting to set a null insert point");
180 BB = TheBB;
181 InsertPt = BB->end();
182 }
183
184 /// This specifies that created instructions should be inserted at the
185 /// specified point.
187 BB = TheBB;
188 InsertPt = IP;
189 }
190
191 /// This specifies that created instructions should be inserted at the
192 /// specified point.
194 BB = IP->getParent();
195 InsertPt = IP->getIterator();
196 }
197
198 /// Insert \p R at the current insertion point. Returns \p R unchanged.
199 template <typename T> [[maybe_unused]] T *insert(T *R) {
200 BB->insert(R, InsertPt);
201 return R;
202 }
203
204 /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
205 /// its underlying Instruction.
207 Instruction *Inst = nullptr,
208 const VPIRFlags &Flags = {},
209 const VPIRMetadata &MD = {},
211 const Twine &Name = "",
212 Type *ResultTy = nullptr) {
213 VPInstruction *NewVPInst = tryInsertInstruction(
214 new VPInstruction(Opcode, Operands, Flags, MD, DL, Name, ResultTy));
215 NewVPInst->setUnderlyingValue(Inst);
216 return NewVPInst;
217 }
219 DebugLoc DL, const Twine &Name = "") {
220 return createInstruction(Opcode, Operands, {}, DL, Name);
221 }
223 const VPIRFlags &Flags,
225 const Twine &Name = "") {
226 return tryInsertInstruction(
227 new VPInstruction(Opcode, Operands, Flags, {}, DL, Name));
228 }
229
231 Type *ResultTy, const VPIRFlags &Flags = {},
233 const Twine &Name = "") {
234 return tryInsertInstruction(new VPInstructionWithType(
235 Opcode, Operands, ResultTy, Flags, {}, DL, Name));
236 }
237
240 const Twine &Name = "") {
241 // Assume that the maximum possible number of elements in a vector fits
242 // within the index type for the default address space.
243 VPlan &Plan = getPlan();
244 Type *IndexTy = Plan.getDataLayout().getIndexType(Plan.getContext(), 0);
245 return tryInsertInstruction(new VPInstruction(
246 VPInstruction::FirstActiveLane, Masks, {}, {}, DL, Name, IndexTy));
247 }
248
251 const Twine &Name = "") {
252 // Assume that the maximum possible number of elements in a vector fits
253 // within the index type for the default address space.
254 VPlan &Plan = getPlan();
255 Type *IndexTy = Plan.getDataLayout().getIndexType(Plan.getContext(), 0);
256 return tryInsertInstruction(new VPInstruction(
257 VPInstruction::LastActiveLane, Masks, {}, {}, DL, Name, IndexTy));
258 }
259
261 unsigned Opcode, ArrayRef<VPValue *> Operands,
262 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false},
263 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "") {
264 return tryInsertInstruction(
265 new VPInstruction(Opcode, Operands, WrapFlags, {}, DL, Name));
266 }
267
270 const Twine &Name = "") {
271 return createInstruction(VPInstruction::Not, {Operand}, {}, DL, Name);
272 }
273
276 const Twine &Name = "") {
277 return createInstruction(Instruction::BinaryOps::And, {LHS, RHS}, {}, DL,
278 Name);
279 }
280
283 const Twine &Name = "") {
284
285 return tryInsertInstruction(new VPInstruction(
286 Instruction::BinaryOps::Or, {LHS, RHS},
287 VPRecipeWithIRFlags::DisjointFlagsTy(false), {}, DL, Name));
288 }
289
292 const Twine &Name = "",
293 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
294 return createOverflowingOp(Instruction::Add, {LHS, RHS}, WrapFlags, DL,
295 Name);
296 }
297
298 VPInstruction *
300 const Twine &Name = "",
301 VPRecipeWithIRFlags::WrapFlagsTy WrapFlags = {false, false}) {
302 return createOverflowingOp(Instruction::Sub, {LHS, RHS}, WrapFlags, DL,
303 Name);
304 }
305
311
317
319 VPValue *FalseVal,
321 const Twine &Name = "",
322 const VPIRFlags &Flags = {}) {
323 return tryInsertInstruction(new VPInstruction(
324 Instruction::Select, {Cond, TrueVal, FalseVal}, Flags, {}, DL, Name));
325 }
326
327 /// Create a new ICmp VPInstruction with predicate \p Pred and operands \p A
328 /// and \p B.
331 const Twine &Name = "") {
333 Pred <= CmpInst::LAST_ICMP_PREDICATE && "invalid predicate");
334 return tryInsertInstruction(
335 new VPInstruction(Instruction::ICmp, {A, B}, Pred, {}, DL, Name));
336 }
337
338 /// Create a new FCmp VPInstruction with predicate \p Pred and operands \p A
339 /// and \p B.
342 const Twine &Name = "") {
344 Pred <= CmpInst::LAST_FCMP_PREDICATE && "invalid predicate");
345 return tryInsertInstruction(
346 new VPInstruction(Instruction::FCmp, {A, B},
347 VPIRFlags(Pred, FastMathFlags()), {}, DL, Name));
348 }
349
350 /// Create an AnyOf reduction pattern: or-reduce \p ChainOp, freeze the
351 /// result, then select between \p TrueVal and \p FalseVal.
353 VPValue *FalseVal,
355
358 const Twine &Name = "") {
359 return createNoWrapPtrAdd(Ptr, Offset, GEPNoWrapFlags::none(), DL, Name);
360 }
361
363 GEPNoWrapFlags GEPFlags,
365 const Twine &Name = "") {
366 return tryInsertInstruction(new VPInstruction(
367 VPInstruction::PtrAdd, {Ptr, Offset}, GEPFlags, {}, DL, Name));
368 }
369
372 const Twine &Name = "") {
373 return tryInsertInstruction(
375 GEPNoWrapFlags::none(), {}, DL, Name));
376 }
377
380 const Twine &Name = "", const VPIRFlags &Flags = {},
381 Type *ResultTy = nullptr) {
382 return tryInsertInstruction(
383 new VPPhi(IncomingValues, Flags, DL, Name, ResultTy));
384 }
385
388 const Twine &Name = "") {
389 return tryInsertInstruction(new VPWidenPHIRecipe(IncomingValues, DL, Name));
390 }
391
393 VPlan &Plan = *getInsertBlock()->getPlan();
394 VPValue *RuntimeEC = Plan.getConstantInt(Ty, EC.getKnownMinValue());
395 if (EC.isScalable()) {
396 VPValue *VScale = createVScale(Ty);
397 RuntimeEC = EC.getKnownMinValue() == 1
398 ? VScale
399 : createOverflowingOp(Instruction::Mul,
400 {VScale, RuntimeEC}, {true, false});
401 }
402 return RuntimeEC;
403 }
404
405 /// Convert the input value \p Current to the corresponding value of an
406 /// induction with \p Start and \p Step values, using \p Start + \p Current *
407 /// \p Step.
409 FPMathOperator *FPBinOp, VPIRValue *Start,
410 VPValue *Current, VPValue *Step) {
411 return tryInsertInstruction(
412 new VPDerivedIVRecipe(Kind, FPBinOp, Start, Current, Step));
413 }
414
416 DebugLoc DL,
417 const VPIRMetadata &Metadata = {}) {
418 return tryInsertInstruction(new VPInstructionWithType(
419 Instruction::Load, Addr, ResultTy, {}, Metadata, DL));
420 }
421
423 Type *ResultTy, DebugLoc DL,
424 const VPIRMetadata &Metadata = {}) {
425 return tryInsertInstruction(new VPInstructionWithType(
426 Opcode, Op, ResultTy, VPIRFlags::getDefaultFlags(Opcode), Metadata,
427 DL));
428 }
429
431 Type *ResultTy, DebugLoc DL,
432 const VPIRFlags &Flags,
433 const VPIRMetadata &Metadata = {}) {
434 return tryInsertInstruction(
435 new VPInstructionWithType(Opcode, Op, ResultTy, Flags, Metadata, DL));
436 }
437
438 /// Create a VScale VPInstruction.
441 return createNaryOp(VPInstruction::VScale, {}, ResultTy, {}, DL);
442 }
443
445 DebugLoc DL) {
446 if (ResultTy == SrcTy)
447 return Op;
448 Instruction::CastOps CastOp =
449 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
450 ? Instruction::Trunc
451 : Instruction::ZExt;
452 return createScalarCast(CastOp, Op, ResultTy, DL);
453 }
454
456 DebugLoc DL) {
457 if (ResultTy == SrcTy)
458 return Op;
459 Instruction::CastOps CastOp =
460 ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
461 ? Instruction::Trunc
462 : Instruction::SExt;
463 return createScalarCast(CastOp, Op, ResultTy, DL);
464 }
465
467 return tryInsertInstruction(
468 new VPInstruction(Instruction::Freeze, Op, {}, {}, DL));
469 }
470
472 Type *ResultTy) {
473 return tryInsertInstruction(new VPWidenCastRecipe(
474 Opcode, Op, ResultTy, nullptr, VPIRFlags::getDefaultFlags(Opcode)));
475 }
476
477 /// Create a single-scalar recipe with \p Opcode and \p Operands without
478 /// inserting it.
480 ArrayRef<VPValue *> Operands,
481 VPValue *Mask,
482 const VPIRFlags &Flags,
483 const VPIRMetadata &Metadata,
484 DebugLoc DL, Instruction *UV) {
485 if (Instruction::isCast(Opcode)) {
486 assert(!Mask && "Cast cannot be predicated");
487 return new VPInstructionWithType(Opcode, Operands, UV->getType(), Flags,
488 Metadata, DL, UV->getName(), UV);
489 }
490 return new VPReplicateRecipe(UV, Operands, /*IsSingleScalar=*/true, Mask,
491 Flags, Metadata, DL);
492 }
493
496 FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
497 VPValue *VF, DebugLoc DL) {
498 return tryInsertInstruction(new VPScalarIVStepsRecipe(
499 IV, Step, VF, InductionOpcode,
500 FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags(), DL));
501 }
502
504 return tryInsertInstruction(new VPExpandSCEVRecipe(Expr));
505 }
506
508 createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
509 GEPNoWrapFlags GEPFlags, DebugLoc DL) {
510 return tryInsertInstruction(
511 new VPVectorPointerRecipe(Ptr, SourceElementTy, Stride, GEPFlags, DL));
512 }
513
515 Intrinsic::ID VectorIntrinsicID, ArrayRef<VPValue *> CallArguments,
516 Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL) {
517 return tryInsertInstruction(new VPWidenMemIntrinsicRecipe(
518 VectorIntrinsicID, CallArguments, Ty, Alignment, MD, DL));
519 }
520
521 //===--------------------------------------------------------------------===//
522 // RAII helpers.
523 //===--------------------------------------------------------------------===//
524
525 /// RAII object that stores the current insertion point and restores it when
526 /// the object is destroyed.
528 VPBuilder &Builder;
529 VPBasicBlock *Block;
531
532 public:
534 : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
535
538
539 ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
540 };
541};
542
543/// TODO: The following VectorizationFactor was pulled out of
544/// LoopVectorizationCostModel class. LV also deals with
545/// VectorizerParams::VectorizationFactor.
546/// We need to streamline them.
547
548/// Information about vectorization costs.
550 /// Vector width with best cost.
552
553 /// Cost of the loop with that width.
555
556 /// Cost of the scalar loop.
558
559 /// The minimum trip count required to make vectorization profitable, e.g. due
560 /// to runtime checks.
562
566
567 /// Width 1 means no vectorization, cost 0 means uncomputed cost.
569 return {ElementCount::getFixed(1), 0, 0};
570 }
571
572 bool operator==(const VectorizationFactor &rhs) const {
573 return Width == rhs.Width && Cost == rhs.Cost;
574 }
575
576 bool operator!=(const VectorizationFactor &rhs) const {
577 return !(*this == rhs);
578 }
579};
580
581/// A class that represents two vectorization factors (initialized with 0 by
582/// default). One for fixed-width vectorization and one for scalable
583/// vectorization. This can be used by the vectorizer to choose from a range of
584/// fixed and/or scalable VFs in order to find the most cost-effective VF to
585/// vectorize with.
589
591 : FixedVF(ElementCount::getFixed(0)),
592 ScalableVF(ElementCount::getScalable(0)) {}
594 *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
595 }
599 assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
600 "Invalid scalable properties");
601 }
602
604
605 /// \return true if either fixed- or scalable VF is non-zero.
606 explicit operator bool() const { return FixedVF || ScalableVF; }
607
608 /// \return true if either fixed- or scalable VF is a valid vector VF.
609 bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
610};
611
612/// Holds state needed to make cost decisions before computing costs per-VF,
613/// including the maximum VFs.
615 /// \return True if maximizing vector bandwidth is enabled by the target or
616 /// user options, for the given register kind (scalable or fixed-width).
617 bool useMaxBandwidth(bool IsScalable) const;
618
619 /// \return the maximized element count based on the targets vector
620 /// registers and the loop trip-count, but limited to a maximum safe VF.
621 /// This is a helper function of computeFeasibleMaxVF.
622 ElementCount getMaximizedVFForTarget(unsigned MaxTripCount,
623 unsigned SmallestType,
624 unsigned WidestType,
625 ElementCount MaxSafeVF, unsigned UserIC,
626 bool FoldTailByMasking,
627 bool RequiresScalarEpilogue);
628
629 /// If \p VF * \p UserIC > MaxTripcount, clamps VF to the next lower VF
630 /// that results in VF * UserIC <= MaxTripCount.
631 ElementCount clampVFByMaxTripCount(ElementCount VF, unsigned MaxTripCount,
632 unsigned UserIC, bool FoldTailByMasking,
633 bool RequiresScalarEpilogue) const;
634
635 /// Checks if scalable vectorization is supported and enabled. Caches the
636 /// result to avoid repeated debug dumps for repeated queries.
637 bool isScalableVectorizationAllowed();
638
639 /// \return the maximum legal scalable VF, based on the safe max number
640 /// of elements.
641 ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
642
643 /// Initializes the value of vscale used for tuning the cost model. If
644 /// vscale_range.min == vscale_range.max then return vscale_range.max, else
645 /// return the value returned by the corresponding TTI method.
646 void initializeVScaleForTuning();
647
648 const TargetTransformInfo &TTI;
649 const LoopVectorizationLegality *Legal;
650 const Loop *TheLoop;
651 const Function &F;
653 DemandedBits *DB;
655 const LoopVectorizeHints *Hints;
656
657 /// Cached result of isScalableVectorizationAllowed.
658 std::optional<bool> IsScalableVectorizationAllowed;
659
660 /// Used to store the value of vscale used for tuning the cost model. It is
661 /// initialized during object construction.
662 std::optional<unsigned> VScaleForTuning;
663
664 /// The highest VF possible for this loop, without using MaxBandwidth.
665 FixedScalableVFPair MaxPermissibleVFWithoutMaxBW;
666
667 /// All element types found in the loop.
668 SmallPtrSet<Type *, 16> ElementTypesInLoop;
669
670 /// PHINodes of the reductions that should be expanded in-loop. Set by
671 /// collectInLoopReductions.
672 SmallPtrSet<PHINode *, 4> InLoopReductions;
673
674 /// A Map of inloop reduction operations and their immediate chain operand.
675 /// FIXME: This can be removed once reductions can be costed correctly in
676 /// VPlan. This was added to allow quick lookup of the inloop operations.
677 /// Set by collectInLoopReductions.
678 DenseMap<Instruction *, Instruction *> InLoopReductionImmediateChains;
679
680 /// Maximum safe number of elements to be processed per vector iteration,
681 /// which do not prevent store-load forwarding and are safe with regard to the
682 /// memory dependencies. Required for EVL-based vectorization, where this
683 /// value is used as the upper bound of the safe AVL. Set by
684 /// computeFeasibleMaxVF.
685 std::optional<unsigned> MaxSafeElements;
686
687 /// Map of scalar integer values to the smallest bitwidth they can be legally
688 /// represented as. The vector equivalents of these values should be truncated
689 /// to this type.
691
692public:
693 /// The kind of cost that we are calculating.
695
696 /// Whether this loop should be optimized for size based on function attribute
697 /// or profile information.
698 const bool OptForSize;
699
701 const LoopVectorizationLegality *Legal,
702 const Loop *TheLoop, const Function &F,
705 const LoopVectorizeHints *Hints, bool OptForSize)
706 : TTI(TTI), Legal(Legal), TheLoop(TheLoop), F(F), PSE(PSE), DB(DB),
707 ORE(ORE), Hints(Hints),
708 CostKind(F.hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput),
710 initializeVScaleForTuning();
711 }
712
713 /// \return The vscale value used for tuning the cost model.
714 std::optional<unsigned> getVScaleForTuning() const { return VScaleForTuning; }
715
716 /// \return True if register pressure should be considered for the given VF.
718
719 /// \return True if scalable vectors are supported by the target or forced.
720 bool supportsScalableVectors() const;
721
722 /// Collect element types in the loop that need widening.
724 const SmallPtrSetImpl<const Value *> *ValuesToIgnore = nullptr);
725
726 /// \return The size (in bits) of the smallest and widest types in the code
727 /// that need to be vectorized. We ignore values that remain scalar such as
728 /// 64 bit loop indices.
729 std::pair<unsigned, unsigned> getSmallestAndWidestTypes() const;
730
731 /// \return An upper bound for the vectorization factors for both
732 /// fixed and scalable vectorization, where the minimum-known number of
733 /// elements is a power-of-2 larger than zero. If scalable vectorization is
734 /// disabled or unsupported, then the scalable part will be equal to
735 /// ElementCount::getScalable(0). Also sets MaxSafeElements.
736 FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount,
737 ElementCount UserVF, unsigned UserIC,
738 bool FoldTailByMasking,
739 bool RequiresScalarEpilogue);
740
741 /// Return maximum safe number of elements to be processed per vector
742 /// iteration, which do not prevent store-load forwarding and are safe with
743 /// regard to the memory dependencies. Required for EVL-based VPlans to
744 /// correctly calculate AVL (application vector length) as min(remaining AVL,
745 /// MaxSafeElements). Set by computeFeasibleMaxVF.
746 /// TODO: need to consider adjusting cost model to use this value as a
747 /// vectorization factor for EVL-based vectorization.
748 std::optional<unsigned> getMaxSafeElements() const { return MaxSafeElements; }
749
750 /// Returns true if we should use strict in-order reductions for the given
751 /// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
752 /// the IsOrdered flag of RdxDesc is set and we do not allow reordering
753 /// of FP operations.
754 bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const;
755
756 /// Returns true if the target machine supports masked loads or stores
757 /// for \p I's data type and alignment. The caller must ensure the access is
758 /// consecutive or part of an interleave group.
760
761 /// Returns true if the target machine can represent \p V as a masked gather
762 /// or scatter operation.
763 bool isLegalGatherOrScatter(Value *V, ElementCount VF) const;
764
765 /// Split reductions into those that happen in the loop, and those that
766 /// happen outside. In-loop reductions are collected into InLoopReductions.
767 /// InLoopReductionImmediateChains is filled with each in-loop reduction
768 /// operation and its immediate chain operand for use during cost modelling.
770
771 /// Returns true if the Phi is part of an inloop reduction.
772 bool isInLoopReduction(PHINode *Phi) const {
773 return InLoopReductions.contains(Phi);
774 }
775
776 /// Returns the set of in-loop reduction PHIs.
778 return InLoopReductions;
779 }
780
781 /// Returns the immediate chain operand of in-loop reduction operation \p I,
782 /// or nullptr if \p I is not an in-loop reduction operation.
784 return InLoopReductionImmediateChains.lookup(I);
785 }
786
787 /// Check whether vectorization would require runtime checks. When optimizing
788 /// for size, returning true here aborts vectorization.
790
791 /// Returns a scalable VF to use for outer-loop vectorization if the target
792 /// supports it and a fixed VF otherwise.
794
795 /// Compute smallest bitwidth each instruction can be represented with.
796 /// The vector equivalents of these instructions should be truncated to this
797 /// type.
799
800 /// \returns The smallest bitwidth each instruction can be represented with.
802 return MinBWs;
803 }
804};
805
806/// Planner drives the vectorization process after having passed
807/// Legality checks.
809 /// The loop that we evaluate.
810 Loop *OrigLoop;
811
812 /// Loop Info analysis.
813 LoopInfo *LI;
814
815 /// The dominator tree.
816 DominatorTree *DT;
817
818 /// Target Library Info.
819 const TargetLibraryInfo *TLI;
820
821 /// Target Transform Info.
822 const TargetTransformInfo &TTI;
823
824 /// The legality analysis.
826
827 /// The profitability analysis.
829
830 /// VF selection state independent of cost-modeling decisions.
831 VFSelectionContext &Config;
832
833 /// The interleaved access analysis.
835
837
838 const LoopVectorizeHints &Hints;
839
841
843
844 /// Profitable vector factors.
846
847 /// A builder used to construct the current plan.
848 VPBuilder Builder;
849
850 /// Computes the cost of \p Plan for vectorization factor \p VF.
851 ///
852 /// The current implementation requires access to the
853 /// LoopVectorizationLegality to handle inductions and reductions, which is
854 /// why it is kept separate from the VPlan-only cost infrastructure.
855 ///
856 /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has
857 /// been retired.
858 InstructionCost cost(VPlan &Plan, ElementCount VF, VPRegisterUsage *RU) const;
859
860 /// Precompute costs for certain instructions using the legacy cost model. The
861 /// function is used to bring up the VPlan-based cost model to initially avoid
862 /// taking different decisions due to inaccuracies in the legacy cost model.
863 InstructionCost precomputeCosts(VPlan &Plan, ElementCount VF,
864 VPCostContext &CostCtx) const;
865
866public:
868 Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
873 : OrigLoop(L), LI(LI), DT(DT), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM),
874 Config(Config), IAI(IAI), PSE(PSE), Hints(Hints), ORE(ORE) {}
875
876 /// Build VPlans for the specified \p UserVF and \p UserIC if they are
877 /// non-zero or all applicable candidate VFs otherwise. If vectorization and
878 /// interleaving should be avoided up-front, no plans are generated.
879 void plan(ElementCount UserVF, unsigned UserIC);
880
881 /// Return the VPlan for \p VF. At the moment, there is always a single VPlan
882 /// for each VF.
883 VPlan &getPlanFor(ElementCount VF) const;
884
885 /// Compute and return the most profitable vectorization factor and the
886 /// corresponding best VPlan. Also collect all profitable VFs in
887 /// ProfitableVFs.
888 std::pair<VectorizationFactor, VPlan *> computeBestVF();
889
890 /// \return The desired interleave count.
891 /// If interleave count has been specified by metadata it will be returned.
892 /// Otherwise, the interleave count is computed and returned. VF and LoopCost
893 /// are the selected vectorization factor and the cost of the selected VF.
894 unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF,
895 InstructionCost LoopCost);
896
897 /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan
898 /// according to the best selected \p VF and \p UF.
899 ///
900 /// TODO: \p EpilogueVecKind should be removed once the re-use issue has been
901 /// fixed.
902 ///
903 /// Returns a mapping of SCEVs to their expanded IR values.
904 /// Note that this is a temporary workaround needed due to the current
905 /// epilogue handling.
907 None, ///< Not part of epilogue vectorization.
908 MainLoop, ///< Vectorizing the main loop of epilogue vectorization.
909 Epilogue ///< Vectorizing the epilogue loop.
910 };
912 executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
914 EpilogueVectorizationKind EpilogueVecKind =
916
917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
918 void printPlans(raw_ostream &O);
919#endif
920
921 /// Look through the existing plans and return true if we have one with
922 /// vectorization factor \p VF.
924 return any_of(VPlans,
925 [&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
926 }
927
928 /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
929 /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
930 /// returned value holds for the entire \p Range.
931 static bool
932 getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
933 VFRange &Range);
934
935 /// \return A VPlan for the most profitable epilogue vectorization, with its
936 /// VF narrowed to the chosen factor. The returned plan is a duplicate.
937 /// Returns nullptr if epilogue vectorization is not supported or not
938 /// profitable for the loop.
939 std::unique_ptr<VPlan>
940 selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC);
941
942 /// Emit remarks for recipes with invalid costs in the available VPlans.
944
945 /// Create a check to \p Plan to see if the vector loop should be executed
946 /// based on its trip count.
947 void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF,
948 ElementCount MinProfitableTripCount) const;
949
950 /// Attach the runtime checks of \p RTChecks to \p Plan.
951 void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks,
952 bool HasBranchWeights) const;
953
954 /// Update loop metadata and profile info for both the scalar remainder loop
955 /// and \p VectorLoop, if it exists. Keeps all loop hints from the original
956 /// loop on the vector loop and replaces vectorizer-specific metadata. The
957 /// loop ID of the original loop \p OrigLoopID must be passed, together with
958 /// the average trip count and invocation weight of the original loop (\p
959 /// OrigAverageTripCount and \p OrigLoopInvocationWeight respectively). They
960 /// cannot be retrieved after the plan has been executed, as the original loop
961 /// may have been removed.
963 Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan,
964 bool VectorizingEpilogue, MDNode *OrigLoopID,
965 std::optional<unsigned> OrigAverageTripCount,
966 unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF,
967 bool DisableRuntimeUnroll);
968
969private:
970 /// Build an initial VPlan, with HCFG wrapping the original scalar loop and
971 /// scalar transformations applied. Returns null if an initial VPlan cannot
972 /// be built.
973 VPlanPtr tryToBuildVPlan1();
974
975 /// Build a VPlan using VPRecipes according to the information gathered by
976 /// Legal and VPlan-based analysis. For outer loops, performs basic recipe
977 /// conversion only. For inner loops, \p Range's largest included VF is
978 /// restricted to the maximum VF the returned VPlan is valid for. If no VPlan
979 /// can be built for the input range, set the largest included VF to the
980 /// maximum VF for which no plan could be built. Each VPlan is built starting
981 /// from a copy of \p InitialPlan, which is a plain CFG VPlan wrapping the
982 /// original scalar loop.
983 VPlanPtr tryToBuildVPlan(VPlanPtr InitialPlan, VFRange &Range);
984
985 /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
986 /// based on \p VPlan1 and according to the information gathered by Legal
987 /// when it checked if it is legal to vectorize the loop.
988 void buildVPlans(VPlan &VPlan1, ElementCount MinVF, ElementCount MaxVF);
989
990 /// Add ComputeReductionResult recipes to the middle block to compute the
991 /// final reduction results. Add Select recipes to the latch block when
992 /// folding tail, to feed ComputeReductionResult with the last or penultimate
993 /// iteration values according to the header mask.
994 void addReductionResultComputation(VPlanPtr &Plan,
995 VPRecipeBuilder &RecipeBuilder,
996 ElementCount MinVF);
997
998 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
999 /// that of B.
1000 bool isMoreProfitable(const VectorizationFactor &A,
1001 const VectorizationFactor &B, bool HasTail,
1002 bool IsEpilogue = false) const;
1003
1004 /// Returns true if the per-lane cost of VectorizationFactor A is lower than
1005 /// that of B in the context of vectorizing a loop with known \p MaxTripCount.
1006 bool isMoreProfitable(const VectorizationFactor &A,
1007 const VectorizationFactor &B,
1008 const unsigned MaxTripCount, bool HasTail,
1009 bool IsEpilogue = false) const;
1010
1011 /// Determines if we have the infrastructure to vectorize the loop and its
1012 /// epilogue, assuming the main loop is vectorized by \p MainPlan.
1013 bool isCandidateForEpilogueVectorization(VPlan &MainPlan) const;
1014};
1015
1016/// A helper function that returns true if the given type is irregular. The
1017/// type is irregular if its allocated size doesn't equal the store size of an
1018/// element of the corresponding vector type.
1019inline bool hasIrregularType(Type *Ty, const DataLayout &DL) {
1020 // Determine if an array of N elements of type Ty is "bitcast compatible"
1021 // with a <N x Ty> vector.
1022 // This is only true if there is no padding between the array elements.
1023 return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
1024}
1025
1026} // namespace llvm
1027
1028#endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
dxil translate DXIL Translate Metadata
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
const SmallVectorImpl< MachineOperand > & Cond
This file defines the SmallSet class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the Vectorization Plan base classes:
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
A debug info location.
Definition DebugLoc.h:126
static DebugLoc getUnknown()
Definition DebugLoc.h:153
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:291
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
Drive the analysis of interleaved memory accesses in the loop.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
DenseMap< const SCEV *, Value * > executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan, InnerLoopVectorizer &LB, DominatorTree *DT, EpilogueVectorizationKind EpilogueVecKind=EpilogueVectorizationKind::None)
EpilogueVectorizationKind
Generate the IR code for the vectorized loop captured in VPlan BestPlan according to the best selecte...
@ MainLoop
Vectorizing the main loop of epilogue vectorization.
VPlan & getPlanFor(ElementCount VF) const
Return the VPlan for VF.
Definition VPlan.cpp:1660
void updateLoopMetadataAndProfileInfo(Loop *VectorLoop, VPBasicBlock *HeaderVPBB, const VPlan &Plan, bool VectorizingEpilogue, MDNode *OrigLoopID, std::optional< unsigned > OrigAverageTripCount, unsigned OrigLoopInvocationWeight, unsigned EstimatedVFxUF, bool DisableRuntimeUnroll)
Update loop metadata and profile info for both the scalar remainder loop and VectorLoop,...
Definition VPlan.cpp:1711
void attachRuntimeChecks(VPlan &Plan, GeneratedRTChecks &RTChecks, bool HasBranchWeights) const
Attach the runtime checks of RTChecks to Plan.
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, const TargetTransformInfo &TTI, LoopVectorizationLegality *Legal, LoopVectorizationCostModel &CM, VFSelectionContext &Config, InterleavedAccessInfo &IAI, PredicatedScalarEvolution &PSE, const LoopVectorizeHints &Hints, OptimizationRemarkEmitter *ORE)
unsigned selectInterleaveCount(VPlan &Plan, ElementCount VF, InstructionCost LoopCost)
void emitInvalidCostRemarks(OptimizationRemarkEmitter *ORE)
Emit remarks for recipes with invalid costs in the available VPlans.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Definition VPlan.cpp:1646
void printPlans(raw_ostream &O)
Definition VPlan.cpp:1817
void plan(ElementCount UserVF, unsigned UserIC)
Build VPlans for the specified UserVF and UserIC if they are non-zero or all applicable candidate VFs...
std::unique_ptr< VPlan > selectBestEpiloguePlan(VPlan &MainPlan, ElementCount MainLoopVF, unsigned IC)
void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount) const
Create a check to Plan to see if the vector loop should be executed based on its trip count.
bool hasPlanWithVF(ElementCount VF) const
Look through the existing plans and return true if we have one with vectorization factor VF.
std::pair< VectorizationFactor, VPlan * > computeBestVF()
Compute and return the most profitable vectorization factor and the corresponding best VPlan.
Utility class for getting and setting loop vectorizer hints in the form of loop metadata.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1069
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
Root of the metadata hierarchy.
Definition Metadata.h:64
The optimization diagnostic interface.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
Holds state needed to make cost decisions before computing costs per-VF, including the maximum VFs.
const bool OptForSize
Whether this loop should be optimized for size based on function attribute or profile information.
FixedScalableVFPair computeVPlanOuterloopVF(ElementCount UserVF)
Returns a scalable VF to use for outer-loop vectorization if the target supports it and a fixed VF ot...
bool isInLoopReduction(PHINode *Phi) const
Returns true if the Phi is part of an inloop reduction.
std::pair< unsigned, unsigned > getSmallestAndWidestTypes() const
const TTI::TargetCostKind CostKind
The kind of cost that we are calculating.
bool runtimeChecksRequired()
Check whether vectorization would require runtime checks.
bool isLegalGatherOrScatter(Value *V, ElementCount VF) const
Returns true if the target machine can represent V as a masked gather or scatter operation.
void collectInLoopReductions()
Split reductions into those that happen in the loop, and those that happen outside.
const SmallPtrSetImpl< PHINode * > & getInLoopReductions() const
Returns the set of in-loop reduction PHIs.
std::optional< unsigned > getMaxSafeElements() const
Return maximum safe number of elements to be processed per vector iteration, which do not prevent sto...
FixedScalableVFPair computeFeasibleMaxVF(unsigned MaxTripCount, ElementCount UserVF, unsigned UserIC, bool FoldTailByMasking, bool RequiresScalarEpilogue)
const MapVector< Instruction *, uint64_t > & getMinimalBitwidths() const
VFSelectionContext(const TargetTransformInfo &TTI, const LoopVectorizationLegality *Legal, const Loop *TheLoop, const Function &F, PredicatedScalarEvolution &PSE, DemandedBits *DB, OptimizationRemarkEmitter *ORE, const LoopVectorizeHints *Hints, bool OptForSize)
Instruction * getInLoopReductionImmediateChain(Instruction *I) const
Returns the immediate chain operand of in-loop reduction operation I, or nullptr if I is not an in-lo...
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const
Returns true if we should use strict in-order reductions for the given RdxDesc.
bool shouldConsiderRegPressureForVF(ElementCount VF) const
void collectElementTypesForWidening(const SmallPtrSetImpl< const Value * > *ValuesToIgnore=nullptr)
Collect element types in the loop that need widening.
bool isLegalMaskedLoadOrStore(Instruction *I, ElementCount VF) const
Returns true if the target machine supports masked loads or stores for I's data type and alignment.
std::optional< unsigned > getVScaleForTuning() const
void computeMinimalBitwidths()
Compute smallest bitwidth each instruction can be represented with.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4377
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4404
iterator end()
Definition VPlan.h:4414
VPlan * getPlan()
Definition VPlan.cpp:211
InsertPointGuard(const InsertPointGuard &)=delete
InsertPointGuard & operator=(const InsertPointGuard &)=delete
InsertPoint - A saved insertion point.
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
Creates a new insertion point at the given location.
VPBasicBlock::iterator getPoint() const
VPInsertPoint()=default
Creates a new insertion point which doesn't point to anything.
bool isSet() const
Returns true if this insert point is set.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createSub(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
This specifies that created instructions should be inserted at the specified point.
void setInsertPoint(VPRecipeBase *IP)
This specifies that created instructions should be inserted at the specified point.
VPValue * createElementCount(Type *Ty, ElementCount EC)
T * insert(T *R)
Insert R at the current insertion point. Returns R unchanged.
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createVScale(Type *ResultTy, DebugLoc DL=DebugLoc::getUnknown())
Create a VScale VPInstruction.
void restoreIP(VPInsertPoint IP)
Sets the current insert point to a previously-saved location.
VPVectorPointerRecipe * createVectorPointer(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
Definition VPlan.cpp:1633
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock * getInsertBlock() const
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPScalarIVStepsRecipe * createScalarIVSteps(Instruction::BinaryOps InductionOpcode, FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL)
VPBuilder(VPBasicBlock *InsertBB)
VPInstruction * createNoWrapPtrAdd(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags GEPFlags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createPtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
VPValue * createScalarFreeze(VPValue *Op, Type *ResultTy, DebugLoc DL)
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createLastActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder(VPRecipeBase *InsertPt)
VPWidenMemIntrinsicRecipe * createWidenMemIntrinsic(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD, DebugLoc DL)
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
void clearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata={})
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBuilder()=default
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPExpandSCEVRecipe * createExpandSCEV(const SCEV *Expr)
VPBuilder(VPBasicBlock *TheBB, VPBasicBlock::iterator IP)
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
static VPSingleDefRecipe * createSingleScalarOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPValue *Mask, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL, Instruction *UV)
Create a single-scalar recipe with Opcode and Operands without inserting it.
VPInstruction * createWidePtrAdd(VPValue *Ptr, VPValue *Offset, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:4175
Recipe to expand a SCEV expression.
Definition VPlan.h:4007
Class to record and manage LLVM IR flags.
Definition VPlan.h:695
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1171
A specialization of VPInstruction augmenting it with a dedicated result type, to be used when the opc...
Definition VPlan.h:1539
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
@ VScale
Returns the value for vscale.
Definition VPlan.h:1348
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
VPBasicBlock * getParent()
Definition VPlan.h:477
Helper class to create VPRecipies from IR instructions.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3385
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition VPlan.h:4235
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
Definition VPlan.h:2348
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1878
A recipe for widening vector memory intrinsics.
Definition VPlan.h:2054
A recipe for widened phis.
Definition VPlan.h:2741
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4735
const DataLayout & getDataLayout() const
Definition VPlan.h:4936
LLVMContext & getContext() const
Definition VPlan.h:4932
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5038
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
void reportVectorizationFailure(const StringRef DebugMsg, const StringRef OREMsg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr)
Reports a vectorization failure: print DebugMsg for debugging purposes along with the corresponding o...
void reportVectorizationInfo(const StringRef Msg, const StringRef ORETag, OptimizationRemarkEmitter *ORE, const Loop *TheLoop, Instruction *I=nullptr, DebugLoc DL={})
Reports an informative message: print Msg for debugging purposes as well as an optimization remark.
void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop, ElementCount VFWidth, unsigned IC)
Report successful vectorization of the loop.
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
bool hasIrregularType(Type *Ty, const DataLayout &DL)
A helper function that returns true if the given type is irregular.
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
cl::opt< unsigned > ForceTargetInstructionCost
TargetTransformInfo TTI
DWARFExpression::Operation Op
cl::opt< bool > EnableVPlanNativePath
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
cl::opt< bool > PreferInLoopReductions
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
A class that represents two vectorization factors (initialized with 0 by default).
FixedScalableVFPair(const ElementCount &FixedVF, const ElementCount &ScalableVF)
FixedScalableVFPair(const ElementCount &Max)
static FixedScalableVFPair getNone()
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:277
A struct that represents some properties of the register usage of a loop.
TODO: The following VectorizationFactor was pulled out of LoopVectorizationCostModel class.
InstructionCost Cost
Cost of the loop with that width.
ElementCount MinProfitableTripCount
The minimum trip count required to make vectorization profitable, e.g.
bool operator==(const VectorizationFactor &rhs) const
ElementCount Width
Vector width with best cost.
InstructionCost ScalarCost
Cost of the scalar loop.
bool operator!=(const VectorizationFactor &rhs) const
static VectorizationFactor Disabled()
Width 1 means no vectorization, cost 0 means uncomputed cost.
VectorizationFactor(ElementCount Width, InstructionCost Cost, InstructionCost ScalarCost)