LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPBlendSC:
561 case VPRecipeBase::VPPredInstPHISC:
562 case VPRecipeBase::VPCanonicalIVPHISC:
563 case VPRecipeBase::VPActiveLaneMaskPHISC:
564 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565 case VPRecipeBase::VPWidenPHISC:
566 case VPRecipeBase::VPWidenIntOrFpInductionSC:
567 case VPRecipeBase::VPWidenPointerInductionSC:
568 case VPRecipeBase::VPReductionPHISC:
569 return true;
570 case VPRecipeBase::VPBranchOnMaskSC:
571 case VPRecipeBase::VPInterleaveEVLSC:
572 case VPRecipeBase::VPInterleaveSC:
573 case VPRecipeBase::VPIRInstructionSC:
574 case VPRecipeBase::VPWidenLoadEVLSC:
575 case VPRecipeBase::VPWidenLoadSC:
576 case VPRecipeBase::VPWidenStoreEVLSC:
577 case VPRecipeBase::VPWidenStoreSC:
578 case VPRecipeBase::VPHistogramSC:
579 // TODO: Widened stores don't define a value, but widened loads do. Split
580 // the recipes to be able to make widened loads VPSingleDefRecipes.
581 return false;
582 }
583 llvm_unreachable("Unhandled VPDefID");
584 }
585
586 static inline bool classof(const VPUser *U) {
587 auto *R = dyn_cast<VPRecipeBase>(U);
588 return R && classof(R);
589 }
590
591 VPSingleDefRecipe *clone() override = 0;
592
593 /// Returns the underlying instruction.
600
601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
602 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
604#endif
605};
606
607/// Class to record and manage LLVM IR flags.
609 enum class OperationType : unsigned char {
610 Cmp,
611 FCmp,
612 OverflowingBinOp,
613 Trunc,
614 DisjointOp,
615 PossiblyExactOp,
616 GEPOp,
617 FPMathOp,
618 NonNegOp,
619 Other
620 };
621
622public:
623 struct WrapFlagsTy {
624 char HasNUW : 1;
625 char HasNSW : 1;
626
628 };
629
631 char HasNUW : 1;
632 char HasNSW : 1;
633
635 };
636
641
643 char NonNeg : 1;
644 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
645 };
646
647private:
648 struct ExactFlagsTy {
649 char IsExact : 1;
650 };
651 struct FastMathFlagsTy {
652 char AllowReassoc : 1;
653 char NoNaNs : 1;
654 char NoInfs : 1;
655 char NoSignedZeros : 1;
656 char AllowReciprocal : 1;
657 char AllowContract : 1;
658 char ApproxFunc : 1;
659
660 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
661 };
662 /// Holds both the predicate and fast-math flags for floating-point
663 /// comparisons.
664 struct FCmpFlagsTy {
666 FastMathFlagsTy FMFs;
667 };
668
669 OperationType OpType;
670
671 union {
676 ExactFlagsTy ExactFlags;
679 FastMathFlagsTy FMFs;
680 FCmpFlagsTy FCmpFlags;
681 unsigned AllFlags;
682 };
683
684public:
685 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
686
688 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
689 OpType = OperationType::FCmp;
690 FCmpFlags.Pred = FCmp->getPredicate();
691 FCmpFlags.FMFs = FCmp->getFastMathFlags();
692 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
693 OpType = OperationType::Cmp;
694 CmpPredicate = Op->getPredicate();
695 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
696 OpType = OperationType::DisjointOp;
697 DisjointFlags.IsDisjoint = Op->isDisjoint();
698 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
699 OpType = OperationType::OverflowingBinOp;
700 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
701 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
702 OpType = OperationType::Trunc;
703 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
704 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
705 OpType = OperationType::PossiblyExactOp;
706 ExactFlags.IsExact = Op->isExact();
707 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
708 OpType = OperationType::GEPOp;
709 GEPFlags = GEP->getNoWrapFlags();
710 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
711 OpType = OperationType::NonNegOp;
712 NonNegFlags.NonNeg = PNNI->hasNonNeg();
713 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
714 OpType = OperationType::FPMathOp;
715 FMFs = Op->getFastMathFlags();
716 } else {
717 OpType = OperationType::Other;
718 AllFlags = 0;
719 }
720 }
721
723 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
724
726 : OpType(OperationType::FCmp) {
727 FCmpFlags.Pred = Pred;
728 FCmpFlags.FMFs = FMFs;
729 }
730
732 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
733
735 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
736
737 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
738
740 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
741
743 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
744
746 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
747
749 OpType = Other.OpType;
750 AllFlags = Other.AllFlags;
751 }
752
753 /// Only keep flags also present in \p Other. \p Other must have the same
754 /// OpType as the current object.
755 void intersectFlags(const VPIRFlags &Other);
756
757 /// Drop all poison-generating flags.
759 // NOTE: This needs to be kept in-sync with
760 // Instruction::dropPoisonGeneratingFlags.
761 switch (OpType) {
762 case OperationType::OverflowingBinOp:
763 WrapFlags.HasNUW = false;
764 WrapFlags.HasNSW = false;
765 break;
766 case OperationType::Trunc:
767 TruncFlags.HasNUW = false;
768 TruncFlags.HasNSW = false;
769 break;
770 case OperationType::DisjointOp:
771 DisjointFlags.IsDisjoint = false;
772 break;
773 case OperationType::PossiblyExactOp:
774 ExactFlags.IsExact = false;
775 break;
776 case OperationType::GEPOp:
778 break;
779 case OperationType::FPMathOp:
780 case OperationType::FCmp:
781 getFMFsRef().NoNaNs = false;
782 getFMFsRef().NoInfs = false;
783 break;
784 case OperationType::NonNegOp:
785 NonNegFlags.NonNeg = false;
786 break;
787 case OperationType::Cmp:
788 case OperationType::Other:
789 break;
790 }
791 }
792
793 /// Apply the IR flags to \p I.
794 void applyFlags(Instruction &I) const {
795 switch (OpType) {
796 case OperationType::OverflowingBinOp:
797 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
798 I.setHasNoSignedWrap(WrapFlags.HasNSW);
799 break;
800 case OperationType::Trunc:
801 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
802 I.setHasNoSignedWrap(TruncFlags.HasNSW);
803 break;
804 case OperationType::DisjointOp:
805 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
806 break;
807 case OperationType::PossiblyExactOp:
808 I.setIsExact(ExactFlags.IsExact);
809 break;
810 case OperationType::GEPOp:
811 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
812 break;
813 case OperationType::FPMathOp:
814 case OperationType::FCmp: {
815 const FastMathFlagsTy &F = getFMFsRef();
816 I.setHasAllowReassoc(F.AllowReassoc);
817 I.setHasNoNaNs(F.NoNaNs);
818 I.setHasNoInfs(F.NoInfs);
819 I.setHasNoSignedZeros(F.NoSignedZeros);
820 I.setHasAllowReciprocal(F.AllowReciprocal);
821 I.setHasAllowContract(F.AllowContract);
822 I.setHasApproxFunc(F.ApproxFunc);
823 break;
824 }
825 case OperationType::NonNegOp:
826 I.setNonNeg(NonNegFlags.NonNeg);
827 break;
828 case OperationType::Cmp:
829 case OperationType::Other:
830 break;
831 }
832 }
833
835 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
836 "recipe doesn't have a compare predicate");
837 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
838 }
839
841 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
842 "recipe doesn't have a compare predicate");
843 if (OpType == OperationType::FCmp)
844 FCmpFlags.Pred = Pred;
845 else
846 CmpPredicate = Pred;
847 }
848
850
851 /// Returns true if the recipe has a comparison predicate.
852 bool hasPredicate() const {
853 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
854 }
855
856 /// Returns true if the recipe has fast-math flags.
857 bool hasFastMathFlags() const {
858 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
859 }
860
862
863 /// Returns true if the recipe has non-negative flag.
864 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
865
866 bool isNonNeg() const {
867 assert(OpType == OperationType::NonNegOp &&
868 "recipe doesn't have a NNEG flag");
869 return NonNegFlags.NonNeg;
870 }
871
872 bool hasNoUnsignedWrap() const {
873 switch (OpType) {
874 case OperationType::OverflowingBinOp:
875 return WrapFlags.HasNUW;
876 case OperationType::Trunc:
877 return TruncFlags.HasNUW;
878 default:
879 llvm_unreachable("recipe doesn't have a NUW flag");
880 }
881 }
882
883 bool hasNoSignedWrap() const {
884 switch (OpType) {
885 case OperationType::OverflowingBinOp:
886 return WrapFlags.HasNSW;
887 case OperationType::Trunc:
888 return TruncFlags.HasNSW;
889 default:
890 llvm_unreachable("recipe doesn't have a NSW flag");
891 }
892 }
893
894 bool isDisjoint() const {
895 assert(OpType == OperationType::DisjointOp &&
896 "recipe cannot have a disjoing flag");
897 return DisjointFlags.IsDisjoint;
898 }
899
900private:
901 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
902 FastMathFlagsTy &getFMFsRef() {
903 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
904 }
905 const FastMathFlagsTy &getFMFsRef() const {
906 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
907 }
908
909public:
910#if !defined(NDEBUG)
911 /// Returns true if the set flags are valid for \p Opcode.
912 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
913#endif
914
915#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
916 void printFlags(raw_ostream &O) const;
917#endif
918};
919
920/// A pure-virtual common base class for recipes defining a single VPValue and
921/// using IR flags.
923 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
924 const VPIRFlags &Flags,
926 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
927
928 static inline bool classof(const VPRecipeBase *R) {
929 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
930 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
935 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
936 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
937 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
938 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
939 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
940 }
941
942 static inline bool classof(const VPUser *U) {
943 auto *R = dyn_cast<VPRecipeBase>(U);
944 return R && classof(R);
945 }
946
947 static inline bool classof(const VPValue *V) {
948 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
949 return R && classof(R);
950 }
951
952 VPRecipeWithIRFlags *clone() override = 0;
953
954 static inline bool classof(const VPSingleDefRecipe *U) {
955 auto *R = dyn_cast<VPRecipeBase>(U);
956 return R && classof(R);
957 }
958
959 void execute(VPTransformState &State) override = 0;
960
961 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
963 VPCostContext &Ctx) const;
964};
965
966/// Helper to access the operand that contains the unroll part for this recipe
967/// after unrolling.
968template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
969protected:
970 /// Return the VPValue operand containing the unroll part or null if there is
971 /// no such operand.
972 VPValue *getUnrollPartOperand(const VPUser &U) const;
973
974 /// Return the unroll part.
975 unsigned getUnrollPart(const VPUser &U) const;
976};
977
978/// Helper to manage IR metadata for recipes. It filters out metadata that
979/// cannot be propagated.
982
983public:
984 VPIRMetadata() = default;
985
986 /// Adds metatadata that can be preserved from the original instruction
987 /// \p I.
989
990 /// Copy constructor for cloning.
991 VPIRMetadata(const VPIRMetadata &Other) = default;
992
994
995 /// Add all metadata to \p I.
996 void applyMetadata(Instruction &I) const;
997
998 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
999 /// already exists, it will be replaced. Otherwise, it will be added.
1000 void setMetadata(unsigned Kind, MDNode *Node) {
1001 auto It =
1002 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1003 return P.first == Kind;
1004 });
1005 if (It != Metadata.end())
1006 It->second = Node;
1007 else
1008 Metadata.emplace_back(Kind, Node);
1009 }
1010
1011 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1012 /// nodes that are common to both.
1013 void intersect(const VPIRMetadata &MD);
1014
1015 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1016 MDNode *getMetadata(unsigned Kind) const {
1017 auto It =
1018 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1019 return It != Metadata.end() ? It->second : nullptr;
1020 }
1021
1022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1023 /// Print metadata with node IDs.
1024 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1025#endif
1026};
1027
1028/// This is a concrete Recipe that models a single VPlan-level instruction.
1029/// While as any Recipe it may generate a sequence of IR instructions when
1030/// executed, these instructions would always form a single-def expression as
1031/// the VPInstruction is also a single def-use vertex.
1033 public VPIRMetadata,
1034 public VPUnrollPartAccessor<1> {
1035 friend class VPlanSlp;
1036
1037public:
1038 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1039 enum {
1041 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1042 // values of a first-order recurrence.
1046 // Creates a mask where each lane is active (true) whilst the current
1047 // counter (first operand + index) is less than the second operand. i.e.
1048 // mask[i] = icmpt ult (op0 + i), op1
1049 // The size of the mask returned is VF * Multiplier (UF, third op).
1053 // Increment the canonical IV separately for each unrolled part.
1055 // Abstract instruction that compares two values and branches. This is
1056 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1059 // Branch with 2 boolean condition operands and 3 successors. If condition
1060 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1061 // successor 1; otherwise branches to successor 2. Expanded after region
1062 // dissolution into: (1) an OR of the two conditions branching to
1063 // middle.split or successor 2, and (2) middle.split branching to successor
1064 // 0 or successor 1 based on condition 0.
1067 /// Given operands of (the same) struct type, creates a struct of fixed-
1068 /// width vectors each containing a struct field of all operands. The
1069 /// number of operands matches the element count of every vector.
1071 /// Creates a fixed-width vector containing all operands. The number of
1072 /// operands matches the vector element count.
1074 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1075 /// abstract VPInstruction whose single defined VPValue represents VF
1076 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1077 /// VPInstructions.
1079 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1080 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1084 // Extracts the last part of its operand. Removed during unrolling.
1086 // Extracts the last lane of its vector operand, per part.
1088 // Extracts the second-to-last lane from its operand or the second-to-last
1089 // part if it is scalar. In the latter case, the recipe will be removed
1090 // during unrolling.
1092 LogicalAnd, // Non-poison propagating logical And.
1093 // Add an offset in bytes (second operand) to a base pointer (first
1094 // operand). Only generates scalar values (either for the first lane only or
1095 // for all lanes, depending on its uses).
1097 // Add a vector offset in bytes (second operand) to a scalar base pointer
1098 // (first operand).
1100 // Returns a scalar boolean value, which is true if any lane of its
1101 // (boolean) vector operands is true. It produces the reduced value across
1102 // all unrolled iterations. Unrolling will add all copies of its original
1103 // operand as additional operands. AnyOf is poison-safe as all operands
1104 // will be frozen.
1106 // Calculates the first active lane index of the vector predicate operands.
1107 // It produces the lane index across all unrolled iterations. Unrolling will
1108 // add all copies of its original operand as additional operands.
1109 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1110 // result even with operands that are all zeroes.
1112 // Calculates the last active lane index of the vector predicate operands.
1113 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1114 // tail-folding to extract the correct live-out value from the last active
1115 // iteration. It produces the lane index across all unrolled iterations.
1116 // Unrolling will add all copies of its original operand as additional
1117 // operands.
1119 // Returns a reversed vector for the operand.
1121
1122 // The opcodes below are used for VPInstructionWithType.
1123 //
1124 /// Scale the first operand (vector step) by the second operand
1125 /// (scalar-step). Casts both operands to the result type if needed.
1127 /// Start vector for reductions with 3 operands: the original start value,
1128 /// the identity value for the reduction and an integer indicating the
1129 /// scaling factor.
1131 // Creates a step vector starting from 0 to VF with a step of 1.
1133 /// Extracts a single lane (first operand) from a set of vector operands.
1134 /// The lane specifies an index into a vector formed by combining all vector
1135 /// operands (all operands after the first one).
1137 /// Explicit user for the resume phi of the canonical induction in the main
1138 /// VPlan, used by the epilogue vector loop.
1140 /// Returns the value for vscale.
1143 };
1144
1145 /// Returns true if this VPInstruction generates scalar values for all lanes.
1146 /// Most VPInstructions generate a single value per part, either vector or
1147 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1148 /// values per all lanes, stemming from an original ingredient. This method
1149 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1150 /// underlying ingredient.
1151 bool doesGeneratePerAllLanes() const;
1152
1153 /// Return the number of operands determined by the opcode of the
1154 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1155 /// directly by the opcode.
1156 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1157
1158private:
1159 typedef unsigned char OpcodeTy;
1160 OpcodeTy Opcode;
1161
1162 /// An optional name that can be used for the generated IR instruction.
1163 std::string Name;
1164
1165 /// Returns true if we can generate a scalar for the first lane only if
1166 /// needed.
1167 bool canGenerateScalarForFirstLane() const;
1168
1169 /// Utility methods serving execute(): generates a single vector instance of
1170 /// the modeled instruction. \returns the generated value. . In some cases an
1171 /// existing value is returned rather than a generated one.
1172 Value *generate(VPTransformState &State);
1173
1174public:
1175 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1176 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1177 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1178
1179 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1180
1181 VPInstruction *clone() override {
1182 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1183 getDebugLoc(), Name);
1184 if (getUnderlyingValue())
1185 New->setUnderlyingValue(getUnderlyingInstr());
1186 return New;
1187 }
1188
1189 unsigned getOpcode() const { return Opcode; }
1190
1191 /// Generate the instruction.
1192 /// TODO: We currently execute only per-part unless a specific instance is
1193 /// provided.
1194 void execute(VPTransformState &State) override;
1195
1196 /// Return the cost of this VPInstruction.
1197 InstructionCost computeCost(ElementCount VF,
1198 VPCostContext &Ctx) const override;
1199
1200#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1201 /// Print the VPInstruction to dbgs() (for debugging).
1202 LLVM_DUMP_METHOD void dump() const;
1203#endif
1204
1205 bool hasResult() const {
1206 // CallInst may or may not have a result, depending on the called function.
1207 // Conservatively return calls have results for now.
1208 switch (getOpcode()) {
1209 case Instruction::Ret:
1210 case Instruction::Br:
1211 case Instruction::Store:
1212 case Instruction::Switch:
1213 case Instruction::IndirectBr:
1214 case Instruction::Resume:
1215 case Instruction::CatchRet:
1216 case Instruction::Unreachable:
1217 case Instruction::Fence:
1218 case Instruction::AtomicRMW:
1222 return false;
1223 default:
1224 return true;
1225 }
1226 }
1227
1228 /// Returns true if the underlying opcode may read from or write to memory.
1229 bool opcodeMayReadOrWriteFromMemory() const;
1230
1231 /// Returns true if the recipe only uses the first lane of operand \p Op.
1232 bool usesFirstLaneOnly(const VPValue *Op) const override;
1233
1234 /// Returns true if the recipe only uses the first part of operand \p Op.
1235 bool usesFirstPartOnly(const VPValue *Op) const override;
1236
1237 /// Returns true if this VPInstruction produces a scalar value from a vector,
1238 /// e.g. by performing a reduction or extracting a lane.
1239 bool isVectorToScalar() const;
1240
1241 /// Returns true if this VPInstruction's operands are single scalars and the
1242 /// result is also a single scalar.
1243 bool isSingleScalar() const;
1244
1245 /// Returns the symbolic name assigned to the VPInstruction.
1246 StringRef getName() const { return Name; }
1247
1248 /// Set the symbolic name for the VPInstruction.
1249 void setName(StringRef NewName) { Name = NewName.str(); }
1250
1251protected:
1252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1253 /// Print the VPInstruction to \p O.
1254 void printRecipe(raw_ostream &O, const Twine &Indent,
1255 VPSlotTracker &SlotTracker) const override;
1256#endif
1257};
1258
1259/// A specialization of VPInstruction augmenting it with a dedicated result
1260/// type, to be used when the opcode and operands of the VPInstruction don't
1261/// directly determine the result type. Note that there is no separate VPDef ID
1262/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1263/// distinguished purely by the opcode.
1265 /// Scalar result type produced by the recipe.
1266 Type *ResultTy;
1267
1268public:
1270 Type *ResultTy, const VPIRFlags &Flags = {},
1271 const VPIRMetadata &Metadata = {},
1273 const Twine &Name = "")
1274 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1275 ResultTy(ResultTy) {}
1276
1277 static inline bool classof(const VPRecipeBase *R) {
1278 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1279 // type information.
1280 if (R->isScalarCast())
1281 return true;
1282 auto *VPI = dyn_cast<VPInstruction>(R);
1283 if (!VPI)
1284 return false;
1285 switch (VPI->getOpcode()) {
1289 return true;
1290 default:
1291 return false;
1292 }
1293 }
1294
1295 static inline bool classof(const VPUser *R) {
1297 }
1298
1299 VPInstruction *clone() override {
1300 auto *New =
1302 *this, *this, getDebugLoc(), getName());
1303 New->setUnderlyingValue(getUnderlyingValue());
1304 return New;
1305 }
1306
1307 void execute(VPTransformState &State) override;
1308
1309 /// Return the cost of this VPInstruction.
1311 VPCostContext &Ctx) const override {
1312 // TODO: Compute accurate cost after retiring the legacy cost model.
1313 return 0;
1314 }
1315
1316 Type *getResultType() const { return ResultTy; }
1317
1318protected:
1319#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1320 /// Print the recipe.
1321 void printRecipe(raw_ostream &O, const Twine &Indent,
1322 VPSlotTracker &SlotTracker) const override;
1323#endif
1324};
1325
1326/// Helper type to provide functions to access incoming values and blocks for
1327/// phi-like recipes.
1329protected:
1330 /// Return a VPRecipeBase* to the current object.
1331 virtual const VPRecipeBase *getAsRecipe() const = 0;
1332
1333public:
1334 virtual ~VPPhiAccessors() = default;
1335
1336 /// Returns the incoming VPValue with index \p Idx.
1337 VPValue *getIncomingValue(unsigned Idx) const {
1338 return getAsRecipe()->getOperand(Idx);
1339 }
1340
1341 /// Returns the incoming block with index \p Idx.
1342 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1343
1344 /// Returns the number of incoming values, also number of incoming blocks.
1345 virtual unsigned getNumIncoming() const {
1346 return getAsRecipe()->getNumOperands();
1347 }
1348
1349 /// Returns an interator range over the incoming values.
1351 return make_range(getAsRecipe()->op_begin(),
1352 getAsRecipe()->op_begin() + getNumIncoming());
1353 }
1354
1356 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1357
1358 /// Returns an iterator range over the incoming blocks.
1360 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1361 return getIncomingBlock(Idx);
1362 };
1363 return map_range(index_range(0, getNumIncoming()), GetBlock);
1364 }
1365
1366 /// Returns an iterator range over pairs of incoming values and corresponding
1367 /// incoming blocks.
1373
1374 /// Removes the incoming value for \p IncomingBlock, which must be a
1375 /// predecessor.
1376 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1377
1378#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1379 /// Print the recipe.
1381#endif
1382};
1383
1385 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1386 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1387
1388 static inline bool classof(const VPUser *U) {
1389 auto *VPI = dyn_cast<VPInstruction>(U);
1390 return VPI && VPI->getOpcode() == Instruction::PHI;
1391 }
1392
1393 static inline bool classof(const VPValue *V) {
1394 auto *VPI = dyn_cast<VPInstruction>(V);
1395 return VPI && VPI->getOpcode() == Instruction::PHI;
1396 }
1397
1398 static inline bool classof(const VPSingleDefRecipe *SDR) {
1399 auto *VPI = dyn_cast<VPInstruction>(SDR);
1400 return VPI && VPI->getOpcode() == Instruction::PHI;
1401 }
1402
1403 VPPhi *clone() override {
1404 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1405 PhiR->setUnderlyingValue(getUnderlyingValue());
1406 return PhiR;
1407 }
1408
1409 void execute(VPTransformState &State) override;
1410
1411protected:
1412#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1413 /// Print the recipe.
1414 void printRecipe(raw_ostream &O, const Twine &Indent,
1415 VPSlotTracker &SlotTracker) const override;
1416#endif
1417
1418 const VPRecipeBase *getAsRecipe() const override { return this; }
1419};
1420
1421/// A recipe to wrap on original IR instruction not to be modified during
1422/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1423/// Expect PHIs, VPIRInstructions cannot have any operands.
1425 Instruction &I;
1426
1427protected:
1428 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1429 /// subclasses may need to be created, e.g. VPIRPhi.
1431 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1432
1433public:
1434 ~VPIRInstruction() override = default;
1435
1436 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1437 /// VPIRInstruction.
1439
1440 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1441
1443 auto *R = create(I);
1444 for (auto *Op : operands())
1445 R->addOperand(Op);
1446 return R;
1447 }
1448
1449 void execute(VPTransformState &State) override;
1450
1451 /// Return the cost of this VPIRInstruction.
1453 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1454
1455 Instruction &getInstruction() const { return I; }
1456
1457 bool usesScalars(const VPValue *Op) const override {
1459 "Op must be an operand of the recipe");
1460 return true;
1461 }
1462
1463 bool usesFirstPartOnly(const VPValue *Op) const override {
1465 "Op must be an operand of the recipe");
1466 return true;
1467 }
1468
1469 bool usesFirstLaneOnly(const VPValue *Op) const override {
1471 "Op must be an operand of the recipe");
1472 return true;
1473 }
1474
1475 /// Update the recipe's first operand to the last lane of the last part of the
1476 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1477 /// least one operand wrapping a PHINode.
1479
1480protected:
1481#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482 /// Print the recipe.
1483 void printRecipe(raw_ostream &O, const Twine &Indent,
1484 VPSlotTracker &SlotTracker) const override;
1485#endif
1486};
1487
1488/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1489/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1490/// allowed, and it is used to add a new incoming value for the single
1491/// predecessor VPBB.
1493 public VPPhiAccessors {
1495
1496 static inline bool classof(const VPRecipeBase *U) {
1497 auto *R = dyn_cast<VPIRInstruction>(U);
1498 return R && isa<PHINode>(R->getInstruction());
1499 }
1500
1502
1503 void execute(VPTransformState &State) override;
1504
1505protected:
1506#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1507 /// Print the recipe.
1508 void printRecipe(raw_ostream &O, const Twine &Indent,
1509 VPSlotTracker &SlotTracker) const override;
1510#endif
1511
1512 const VPRecipeBase *getAsRecipe() const override { return this; }
1513};
1514
1515/// VPWidenRecipe is a recipe for producing a widened instruction using the
1516/// opcode and operands of the recipe. This recipe covers most of the
1517/// traditional vectorization cases where each recipe transforms into a
1518/// vectorized version of itself.
1520 public VPIRMetadata {
1521 unsigned Opcode;
1522
1523public:
1525 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1526 DebugLoc DL = {})
1527 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1528 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1529 setUnderlyingValue(&I);
1530 }
1531
1532 ~VPWidenRecipe() override = default;
1533
1534 VPWidenRecipe *clone() override {
1535 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1536 getDebugLoc());
1537 }
1538
1539 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1540
1541 /// Produce a widened instruction using the opcode and operands of the recipe,
1542 /// processing State.VF elements.
1543 void execute(VPTransformState &State) override;
1544
1545 /// Return the cost of this VPWidenRecipe.
1546 InstructionCost computeCost(ElementCount VF,
1547 VPCostContext &Ctx) const override;
1548
1549 unsigned getOpcode() const { return Opcode; }
1550
1551protected:
1552#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1553 /// Print the recipe.
1554 void printRecipe(raw_ostream &O, const Twine &Indent,
1555 VPSlotTracker &SlotTracker) const override;
1556#endif
1557
1558 /// Returns true if the recipe only uses the first lane of operand \p Op.
1559 bool usesFirstLaneOnly(const VPValue *Op) const override {
1561 "Op must be an operand of the recipe");
1562 return Opcode == Instruction::Select && Op == getOperand(0) &&
1563 Op->isDefinedOutsideLoopRegions();
1564 }
1565};
1566
1567/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1569 /// Cast instruction opcode.
1570 Instruction::CastOps Opcode;
1571
1572 /// Result type for the cast.
1573 Type *ResultTy;
1574
1575public:
1577 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1578 const VPIRMetadata &Metadata = {},
1580 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1581 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1582 assert(flagsValidForOpcode(Opcode) &&
1583 "Set flags not supported for the provided opcode");
1585 }
1586
1587 ~VPWidenCastRecipe() override = default;
1588
1590 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1592 *this, *this, getDebugLoc());
1593 }
1594
1595 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1596
1597 /// Produce widened copies of the cast.
1598 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1599
1600 /// Return the cost of this VPWidenCastRecipe.
1602 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1603
1604 Instruction::CastOps getOpcode() const { return Opcode; }
1605
1606 /// Returns the result type of the cast.
1607 Type *getResultType() const { return ResultTy; }
1608
1609protected:
1610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1611 /// Print the recipe.
1612 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1613 VPSlotTracker &SlotTracker) const override;
1614#endif
1615};
1616
1617/// A recipe for widening vector intrinsics.
1619 /// ID of the vector intrinsic to widen.
1620 Intrinsic::ID VectorIntrinsicID;
1621
1622 /// Scalar return type of the intrinsic.
1623 Type *ResultTy;
1624
1625 /// True if the intrinsic may read from memory.
1626 bool MayReadFromMemory;
1627
1628 /// True if the intrinsic may read write to memory.
1629 bool MayWriteToMemory;
1630
1631 /// True if the intrinsic may have side-effects.
1632 bool MayHaveSideEffects;
1633
1634public:
1636 ArrayRef<VPValue *> CallArguments, Type *Ty,
1637 const VPIRFlags &Flags = {},
1638 const VPIRMetadata &MD = {},
1640 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1641 DL),
1642 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1643 MayReadFromMemory(CI.mayReadFromMemory()),
1644 MayWriteToMemory(CI.mayWriteToMemory()),
1645 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1646 setUnderlyingValue(&CI);
1647 }
1648
1650 ArrayRef<VPValue *> CallArguments, Type *Ty,
1651 const VPIRFlags &Flags = {},
1652 const VPIRMetadata &Metadata = {},
1654 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1655 DL),
1656 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1657 ResultTy(Ty) {
1658 LLVMContext &Ctx = Ty->getContext();
1659 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1660 MemoryEffects ME = Attrs.getMemoryEffects();
1661 MayReadFromMemory = !ME.onlyWritesMemory();
1662 MayWriteToMemory = !ME.onlyReadsMemory();
1663 MayHaveSideEffects = MayWriteToMemory ||
1664 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1665 !Attrs.hasAttribute(Attribute::WillReturn);
1666 }
1667
1668 ~VPWidenIntrinsicRecipe() override = default;
1669
1671 if (Value *CI = getUnderlyingValue())
1672 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1673 operands(), ResultTy, *this, *this,
1674 getDebugLoc());
1675 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1676 *this, *this, getDebugLoc());
1677 }
1678
1679 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1680
1681 /// Produce a widened version of the vector intrinsic.
1682 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1683
1684 /// Return the cost of this vector intrinsic.
1686 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1687
1688 /// Return the ID of the intrinsic.
1689 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1690
1691 /// Return the scalar return type of the intrinsic.
1692 Type *getResultType() const { return ResultTy; }
1693
1694 /// Return to name of the intrinsic as string.
1696
1697 /// Returns true if the intrinsic may read from memory.
1698 bool mayReadFromMemory() const { return MayReadFromMemory; }
1699
1700 /// Returns true if the intrinsic may write to memory.
1701 bool mayWriteToMemory() const { return MayWriteToMemory; }
1702
1703 /// Returns true if the intrinsic may have side-effects.
1704 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1705
1706 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1707
1708protected:
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714};
1715
1716/// A recipe for widening Call instructions using library calls.
1718 public VPIRMetadata {
1719 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1720 /// between a given VF and the chosen vectorized variant, so there will be a
1721 /// different VPlan for each VF with a valid variant.
1722 Function *Variant;
1723
1724public:
1726 ArrayRef<VPValue *> CallArguments,
1727 const VPIRFlags &Flags = {},
1728 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1729 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1730 VPIRMetadata(Metadata), Variant(Variant) {
1731 setUnderlyingValue(UV);
1732 assert(
1733 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1734 "last operand must be the called function");
1735 }
1736
1737 ~VPWidenCallRecipe() override = default;
1738
1740 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1741 *this, *this, getDebugLoc());
1742 }
1743
1744 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1745
1746 /// Produce a widened version of the call instruction.
1747 void execute(VPTransformState &State) override;
1748
1749 /// Return the cost of this VPWidenCallRecipe.
1750 InstructionCost computeCost(ElementCount VF,
1751 VPCostContext &Ctx) const override;
1752
1756
1759
1760protected:
1761#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1762 /// Print the recipe.
1763 void printRecipe(raw_ostream &O, const Twine &Indent,
1764 VPSlotTracker &SlotTracker) const override;
1765#endif
1766};
1767
1768/// A recipe representing a sequence of load -> update -> store as part of
1769/// a histogram operation. This means there may be aliasing between vector
1770/// lanes, which is handled by the llvm.experimental.vector.histogram family
1771/// of intrinsics. The only update operations currently supported are
1772/// 'add' and 'sub' where the other term is loop-invariant.
1774 /// Opcode of the update operation, currently either add or sub.
1775 unsigned Opcode;
1776
1777public:
1778 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1780 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1781
1782 ~VPHistogramRecipe() override = default;
1783
1785 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1786 }
1787
1788 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1789
1790 /// Produce a vectorized histogram operation.
1791 void execute(VPTransformState &State) override;
1792
1793 /// Return the cost of this VPHistogramRecipe.
1795 VPCostContext &Ctx) const override;
1796
1797 unsigned getOpcode() const { return Opcode; }
1798
1799 /// Return the mask operand if one was provided, or a null pointer if all
1800 /// lanes should be executed unconditionally.
1801 VPValue *getMask() const {
1802 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1803 }
1804
1805protected:
1806#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1807 /// Print the recipe
1808 void printRecipe(raw_ostream &O, const Twine &Indent,
1809 VPSlotTracker &SlotTracker) const override;
1810#endif
1811};
1812
1813/// A recipe for handling GEP instructions.
1815 Type *SourceElementTy;
1816
1817 bool isPointerLoopInvariant() const {
1818 return getOperand(0)->isDefinedOutsideLoopRegions();
1819 }
1820
1821 bool isIndexLoopInvariant(unsigned I) const {
1822 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1823 }
1824
1825public:
1827 const VPIRFlags &Flags = {},
1829 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1830 SourceElementTy(GEP->getSourceElementType()) {
1831 setUnderlyingValue(GEP);
1833 (void)Metadata;
1835 assert(Metadata.empty() && "unexpected metadata on GEP");
1836 }
1837
1838 ~VPWidenGEPRecipe() override = default;
1839
1842 operands(), *this, getDebugLoc());
1843 }
1844
1845 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1846
1847 /// This recipe generates a GEP instruction.
1848 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1849
1850 /// Generate the gep nodes.
1851 void execute(VPTransformState &State) override;
1852
1853 Type *getSourceElementType() const { return SourceElementTy; }
1854
1855 /// Return the cost of this VPWidenGEPRecipe.
1857 VPCostContext &Ctx) const override {
1858 // TODO: Compute accurate cost after retiring the legacy cost model.
1859 return 0;
1860 }
1861
1862 /// Returns true if the recipe only uses the first lane of operand \p Op.
1863 bool usesFirstLaneOnly(const VPValue *Op) const override;
1864
1865protected:
1866#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1867 /// Print the recipe.
1868 void printRecipe(raw_ostream &O, const Twine &Indent,
1869 VPSlotTracker &SlotTracker) const override;
1870#endif
1871};
1872
1873/// A recipe to compute a pointer to the last element of each part of a widened
1874/// memory access for widened memory accesses of IndexedTy. Used for
1875/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1877 public VPUnrollPartAccessor<2> {
1878 Type *IndexedTy;
1879
1880 /// The constant stride of the pointer computed by this recipe, expressed in
1881 /// units of IndexedTy.
1882 int64_t Stride;
1883
1884public:
1886 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1887 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1888 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1889 IndexedTy(IndexedTy), Stride(Stride) {
1890 assert(Stride < 0 && "Stride must be negative");
1891 }
1892
1893 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1894
1896 const VPValue *getVFValue() const { return getOperand(1); }
1897
1898 void execute(VPTransformState &State) override;
1899
1900 bool usesFirstLaneOnly(const VPValue *Op) const override {
1902 "Op must be an operand of the recipe");
1903 return true;
1904 }
1905
1906 /// Return the cost of this VPVectorPointerRecipe.
1908 VPCostContext &Ctx) const override {
1909 // TODO: Compute accurate cost after retiring the legacy cost model.
1910 return 0;
1911 }
1912
1913 /// Returns true if the recipe only uses the first part of operand \p Op.
1914 bool usesFirstPartOnly(const VPValue *Op) const override {
1916 "Op must be an operand of the recipe");
1917 assert(getNumOperands() <= 2 && "must have at most two operands");
1918 return true;
1919 }
1920
1922 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1923 Stride, getGEPNoWrapFlags(),
1924 getDebugLoc());
1925 }
1926
1927protected:
1928#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1929 /// Print the recipe.
1930 void printRecipe(raw_ostream &O, const Twine &Indent,
1931 VPSlotTracker &SlotTracker) const override;
1932#endif
1933};
1934
1935/// A recipe to compute the pointers for widened memory accesses of \p
1936/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
1937/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
1939 Type *SourceElementTy;
1940
1941public:
1942 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1944 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
1945 SourceElementTy(SourceElementTy) {}
1946
1947 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1948
1950 return getNumOperands() == 2 ? getOperand(1) : nullptr;
1951 }
1952
1953 void execute(VPTransformState &State) override;
1954
1955 Type *getSourceElementType() const { return SourceElementTy; }
1956
1957 bool usesFirstLaneOnly(const VPValue *Op) const override {
1959 "Op must be an operand of the recipe");
1960 return true;
1961 }
1962
1963 /// Returns true if the recipe only uses the first part of operand \p Op.
1964 bool usesFirstPartOnly(const VPValue *Op) const override {
1966 "Op must be an operand of the recipe");
1967 assert(getNumOperands() <= 2 && "must have at most two operands");
1968 return true;
1969 }
1970
1972 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
1974 if (auto *Off = getOffset())
1975 Clone->addOperand(Off);
1976 return Clone;
1977 }
1978
1979 /// Return the cost of this VPHeaderPHIRecipe.
1981 VPCostContext &Ctx) const override {
1982 // TODO: Compute accurate cost after retiring the legacy cost model.
1983 return 0;
1984 }
1985
1986protected:
1987#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1988 /// Print the recipe.
1989 void printRecipe(raw_ostream &O, const Twine &Indent,
1990 VPSlotTracker &SlotTracker) const override;
1991#endif
1992};
1993
1994/// A pure virtual base class for all recipes modeling header phis, including
1995/// phis for first order recurrences, pointer inductions and reductions. The
1996/// start value is the first operand of the recipe and the incoming value from
1997/// the backedge is the second operand.
1998///
1999/// Inductions are modeled using the following sub-classes:
2000/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2001/// starting at a specified value (zero for the main vector loop, the resume
2002/// value for the epilogue vector loop) and stepping by 1. The induction
2003/// controls exiting of the vector loop by comparing against the vector trip
2004/// count. Produces a single scalar PHI for the induction value per
2005/// iteration.
2006/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2007/// floating point inductions with arbitrary start and step values. Produces
2008/// a vector PHI per-part.
2009/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2010/// value of an IV with different start and step values. Produces a single
2011/// scalar value per iteration
2012/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2013/// canonical or derived induction.
2014/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2015/// pointer induction. Produces either a vector PHI per-part or scalar values
2016/// per-lane based on the canonical induction.
2018 public VPPhiAccessors {
2019protected:
2020 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2021 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2022 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2023 UnderlyingInstr, DL) {}
2024
2025 const VPRecipeBase *getAsRecipe() const override { return this; }
2026
2027public:
2028 ~VPHeaderPHIRecipe() override = default;
2029
2030 /// Method to support type inquiry through isa, cast, and dyn_cast.
2031 static inline bool classof(const VPRecipeBase *R) {
2032 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2033 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2034 }
2035 static inline bool classof(const VPValue *V) {
2036 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2037 }
2038 static inline bool classof(const VPSingleDefRecipe *R) {
2039 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2040 }
2041
2042 /// Generate the phi nodes.
2043 void execute(VPTransformState &State) override = 0;
2044
2045 /// Return the cost of this header phi recipe.
2047 VPCostContext &Ctx) const override;
2048
2049 /// Returns the start value of the phi, if one is set.
2051 return getNumOperands() == 0 ? nullptr : getOperand(0);
2052 }
2054 return getNumOperands() == 0 ? nullptr : getOperand(0);
2055 }
2056
2057 /// Update the start value of the recipe.
2059
2060 /// Returns the incoming value from the loop backedge.
2062 return getOperand(1);
2063 }
2064
2065 /// Update the incoming value from the loop backedge.
2067
2068 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2069 /// to be a recipe.
2071 return *getBackedgeValue()->getDefiningRecipe();
2072 }
2073
2074protected:
2075#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2076 /// Print the recipe.
2077 void printRecipe(raw_ostream &O, const Twine &Indent,
2078 VPSlotTracker &SlotTracker) const override = 0;
2079#endif
2080};
2081
2082/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2083/// VPWidenPointerInductionRecipe), providing shared functionality, including
2084/// retrieving the step value, induction descriptor and original phi node.
2086 const InductionDescriptor &IndDesc;
2087
2088public:
2089 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2090 VPValue *Step, const InductionDescriptor &IndDesc,
2091 DebugLoc DL)
2092 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2093 addOperand(Step);
2094 }
2095
2096 static inline bool classof(const VPRecipeBase *R) {
2097 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2098 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2099 }
2100
2101 static inline bool classof(const VPValue *V) {
2102 auto *R = V->getDefiningRecipe();
2103 return R && classof(R);
2104 }
2105
2106 static inline bool classof(const VPSingleDefRecipe *R) {
2107 return classof(static_cast<const VPRecipeBase *>(R));
2108 }
2109
2110 void execute(VPTransformState &State) override = 0;
2111
2112 /// Returns the start value of the induction.
2114
2115 /// Returns the step value of the induction.
2117 const VPValue *getStepValue() const { return getOperand(1); }
2118
2119 /// Update the step value of the recipe.
2120 void setStepValue(VPValue *V) { setOperand(1, V); }
2121
2123 const VPValue *getVFValue() const { return getOperand(2); }
2124
2125 /// Returns the number of incoming values, also number of incoming blocks.
2126 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2127 /// incoming value, its start value.
2128 unsigned getNumIncoming() const override { return 1; }
2129
2131
2132 /// Returns the induction descriptor for the recipe.
2133 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2134
2136 // TODO: All operands of base recipe must exist and be at same index in
2137 // derived recipe.
2139 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2140 }
2141
2143 // TODO: All operands of base recipe must exist and be at same index in
2144 // derived recipe.
2146 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2147 }
2148
2149 /// Returns true if the recipe only uses the first lane of operand \p Op.
2150 bool usesFirstLaneOnly(const VPValue *Op) const override {
2152 "Op must be an operand of the recipe");
2153 // The recipe creates its own wide start value, so it only requests the
2154 // first lane of the operand.
2155 // TODO: Remove once creating the start value is modeled separately.
2156 return Op == getStartValue() || Op == getStepValue();
2157 }
2158};
2159
2160/// A recipe for handling phi nodes of integer and floating-point inductions,
2161/// producing their vector values. This is an abstract recipe and must be
2162/// converted to concrete recipes before executing.
2164 public VPIRFlags {
2165 TruncInst *Trunc;
2166
2167 // If this recipe is unrolled it will have 2 additional operands.
2168 bool isUnrolled() const { return getNumOperands() == 5; }
2169
2170public:
2172 VPValue *VF, const InductionDescriptor &IndDesc,
2173 const VPIRFlags &Flags, DebugLoc DL)
2174 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2175 Step, IndDesc, DL),
2176 VPIRFlags(Flags), Trunc(nullptr) {
2177 addOperand(VF);
2178 }
2179
2181 VPValue *VF, const InductionDescriptor &IndDesc,
2182 TruncInst *Trunc, const VPIRFlags &Flags,
2183 DebugLoc DL)
2184 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2185 Step, IndDesc, DL),
2186 VPIRFlags(Flags), Trunc(Trunc) {
2187 addOperand(VF);
2189 (void)Metadata;
2190 if (Trunc)
2192 assert(Metadata.empty() && "unexpected metadata on Trunc");
2193 }
2194
2196
2202
2203 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2204
2205 void execute(VPTransformState &State) override {
2206 llvm_unreachable("cannot execute this recipe, should be expanded via "
2207 "expandVPWidenIntOrFpInductionRecipe");
2208 }
2209
2210 /// Returns the start value of the induction.
2212
2213 /// If the recipe has been unrolled, return the VPValue for the induction
2214 /// increment, otherwise return null.
2216 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2217 }
2218
2219 /// Returns the number of incoming values, also number of incoming blocks.
2220 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2221 /// incoming value, its start value.
2222 unsigned getNumIncoming() const override { return 1; }
2223
2224 /// Returns the first defined value as TruncInst, if it is one or nullptr
2225 /// otherwise.
2226 TruncInst *getTruncInst() { return Trunc; }
2227 const TruncInst *getTruncInst() const { return Trunc; }
2228
2229 /// Returns true if the induction is canonical, i.e. starting at 0 and
2230 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2231 /// same type as the canonical induction.
2232 bool isCanonical() const;
2233
2234 /// Returns the scalar type of the induction.
2236 return Trunc ? Trunc->getType() : getStartValue()->getType();
2237 }
2238
2239 /// Returns the VPValue representing the value of this induction at
2240 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2241 /// take place.
2243 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2244 }
2245
2246protected:
2247#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2248 /// Print the recipe.
2249 void printRecipe(raw_ostream &O, const Twine &Indent,
2250 VPSlotTracker &SlotTracker) const override;
2251#endif
2252};
2253
2255public:
2256 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2257 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2258 /// VF*UF.
2260 VPValue *NumUnrolledElems,
2261 const InductionDescriptor &IndDesc, DebugLoc DL)
2262 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2263 Step, IndDesc, DL) {
2264 addOperand(NumUnrolledElems);
2265 }
2266
2268
2274
2275 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2276
2277 /// Generate vector values for the pointer induction.
2278 void execute(VPTransformState &State) override {
2279 llvm_unreachable("cannot execute this recipe, should be expanded via "
2280 "expandVPWidenPointerInduction");
2281 };
2282
2283 /// Returns true if only scalar values will be generated.
2284 bool onlyScalarsGenerated(bool IsScalable);
2285
2286protected:
2287#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2288 /// Print the recipe.
2289 void printRecipe(raw_ostream &O, const Twine &Indent,
2290 VPSlotTracker &SlotTracker) const override;
2291#endif
2292};
2293
2294/// A recipe for widened phis. Incoming values are operands of the recipe and
2295/// their operand index corresponds to the incoming predecessor block. If the
2296/// recipe is placed in an entry block to a (non-replicate) region, it must have
2297/// exactly 2 incoming values, the first from the predecessor of the region and
2298/// the second from the exiting block of the region.
2300 public VPPhiAccessors {
2301 /// Name to use for the generated IR instruction for the widened phi.
2302 std::string Name;
2303
2304public:
2305 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2306 /// debug location \p DL.
2307 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2308 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2309 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2310 if (Start)
2311 addOperand(Start);
2312 }
2313
2316 getOperand(0), getDebugLoc(), Name);
2318 C->addOperand(Op);
2319 return C;
2320 }
2321
2322 ~VPWidenPHIRecipe() override = default;
2323
2324 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2325
2326 /// Generate the phi/select nodes.
2327 void execute(VPTransformState &State) override;
2328
2329protected:
2330#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2331 /// Print the recipe.
2332 void printRecipe(raw_ostream &O, const Twine &Indent,
2333 VPSlotTracker &SlotTracker) const override;
2334#endif
2335
2336 const VPRecipeBase *getAsRecipe() const override { return this; }
2337};
2338
2339/// A recipe for handling first-order recurrence phis. The start value is the
2340/// first operand of the recipe and the incoming value from the backedge is the
2341/// second operand.
2344 VPValue &BackedgeValue)
2345 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {
2346 addOperand(&BackedgeValue);
2347 }
2348
2349 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2350
2355
2356 void execute(VPTransformState &State) override;
2357
2358 /// Return the cost of this first-order recurrence phi recipe.
2360 VPCostContext &Ctx) const override;
2361
2362 /// Returns true if the recipe only uses the first lane of operand \p Op.
2363 bool usesFirstLaneOnly(const VPValue *Op) const override {
2365 "Op must be an operand of the recipe");
2366 return Op == getStartValue();
2367 }
2368
2369protected:
2370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2371 /// Print the recipe.
2372 void printRecipe(raw_ostream &O, const Twine &Indent,
2373 VPSlotTracker &SlotTracker) const override;
2374#endif
2375};
2376
2377/// Possible variants of a reduction.
2378
2379/// This reduction is ordered and in-loop.
2380struct RdxOrdered {};
2381/// This reduction is in-loop.
2382struct RdxInLoop {};
2383/// This reduction is unordered with the partial result scaled down by some
2384/// factor.
2387};
2388using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2389
2390inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2391 unsigned ScaleFactor) {
2392 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2393 if (Ordered)
2394 return RdxOrdered{};
2395 if (InLoop)
2396 return RdxInLoop{};
2397 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2398}
2399
2400/// A recipe for handling reduction phis. The start value is the first operand
2401/// of the recipe and the incoming value from the backedge is the second
2402/// operand.
2404 public VPUnrollPartAccessor<2> {
2405 /// The recurrence kind of the reduction.
2406 const RecurKind Kind;
2407
2408 ReductionStyle Style;
2409
2410 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2411 /// patterns for argmin/argmax).
2412 /// TODO: Also support cases where the phi itself has a single use, but its
2413 /// compare has multiple uses.
2414 bool HasUsesOutsideReductionChain;
2415
2416public:
2417 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2419 VPValue &BackedgeValue, ReductionStyle Style,
2420 bool HasUsesOutsideReductionChain = false)
2421 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2422 Style(Style),
2423 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2424 addOperand(&BackedgeValue);
2425 }
2426
2427 ~VPReductionPHIRecipe() override = default;
2428
2430 return new VPReductionPHIRecipe(
2432 *getOperand(0), *getBackedgeValue(), Style,
2433 HasUsesOutsideReductionChain);
2434 }
2435
2436 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2437
2438 /// Generate the phi/select nodes.
2439 void execute(VPTransformState &State) override;
2440
2441 /// Get the factor that the VF of this recipe's output should be scaled by, or
2442 /// 1 if it isn't scaled.
2443 unsigned getVFScaleFactor() const {
2444 auto *Partial = std::get_if<RdxUnordered>(&Style);
2445 return Partial ? Partial->VFScaleFactor : 1;
2446 }
2447
2448 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2449 /// > 1.
2450 void setVFScaleFactor(unsigned ScaleFactor) {
2451 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2452 Style = RdxUnordered{ScaleFactor};
2453 }
2454
2455 /// Returns the number of incoming values, also number of incoming blocks.
2456 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2457 /// incoming value, its start value.
2458 unsigned getNumIncoming() const override { return 2; }
2459
2460 /// Returns the recurrence kind of the reduction.
2461 RecurKind getRecurrenceKind() const { return Kind; }
2462
2463 /// Returns true, if the phi is part of an ordered reduction.
2464 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2465
2466 /// Returns true if the phi is part of an in-loop reduction.
2467 bool isInLoop() const {
2468 return std::holds_alternative<RdxInLoop>(Style) ||
2469 std::holds_alternative<RdxOrdered>(Style);
2470 }
2471
2472 /// Returns true if the reduction outputs a vector with a scaled down VF.
2473 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2474
2475 /// Returns true, if the phi is part of a multi-use reduction.
2477 return HasUsesOutsideReductionChain;
2478 }
2479
2480 /// Returns true if the recipe only uses the first lane of operand \p Op.
2481 bool usesFirstLaneOnly(const VPValue *Op) const override {
2483 "Op must be an operand of the recipe");
2484 return isOrdered() || isInLoop();
2485 }
2486
2487protected:
2488#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2489 /// Print the recipe.
2490 void printRecipe(raw_ostream &O, const Twine &Indent,
2491 VPSlotTracker &SlotTracker) const override;
2492#endif
2493};
2494
2495/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2496/// instructions.
2498public:
2499 /// The blend operation is a User of the incoming values and of their
2500 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2501 /// be omitted (implied by passing an odd number of operands) in which case
2502 /// all other incoming values are merged into it.
2504 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2505 assert(Operands.size() >= 2 && "Expected at least two operands!");
2506 }
2507
2512
2513 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2514
2515 /// A normalized blend is one that has an odd number of operands, whereby the
2516 /// first operand does not have an associated mask.
2517 bool isNormalized() const { return getNumOperands() % 2; }
2518
2519 /// Return the number of incoming values, taking into account when normalized
2520 /// the first incoming value will have no mask.
2521 unsigned getNumIncomingValues() const {
2522 return (getNumOperands() + isNormalized()) / 2;
2523 }
2524
2525 /// Return incoming value number \p Idx.
2526 VPValue *getIncomingValue(unsigned Idx) const {
2527 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2528 }
2529
2530 /// Return mask number \p Idx.
2531 VPValue *getMask(unsigned Idx) const {
2532 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2533 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2534 }
2535
2536 /// Set mask number \p Idx to \p V.
2537 void setMask(unsigned Idx, VPValue *V) {
2538 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2539 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2540 }
2541
2542 void execute(VPTransformState &State) override {
2543 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2544 }
2545
2546 /// Return the cost of this VPWidenMemoryRecipe.
2547 InstructionCost computeCost(ElementCount VF,
2548 VPCostContext &Ctx) const override;
2549
2550 /// Returns true if the recipe only uses the first lane of operand \p Op.
2551 bool usesFirstLaneOnly(const VPValue *Op) const override {
2553 "Op must be an operand of the recipe");
2554 // Recursing through Blend recipes only, must terminate at header phi's the
2555 // latest.
2556 return all_of(users(),
2557 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2558 }
2559
2560protected:
2561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2562 /// Print the recipe.
2563 void printRecipe(raw_ostream &O, const Twine &Indent,
2564 VPSlotTracker &SlotTracker) const override;
2565#endif
2566};
2567
2568/// A common base class for interleaved memory operations.
2569/// An Interleaved memory operation is a memory access method that combines
2570/// multiple strided loads/stores into a single wide load/store with shuffles.
2571/// The first operand is the start address. The optional operands are, in order,
2572/// the stored values and the mask.
2574 public VPIRMetadata {
2576
2577 /// Indicates if the interleave group is in a conditional block and requires a
2578 /// mask.
2579 bool HasMask = false;
2580
2581 /// Indicates if gaps between members of the group need to be masked out or if
2582 /// unusued gaps can be loaded speculatively.
2583 bool NeedsMaskForGaps = false;
2584
2585protected:
2586 VPInterleaveBase(const unsigned char SC,
2588 ArrayRef<VPValue *> Operands,
2589 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2590 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2591 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2592 NeedsMaskForGaps(NeedsMaskForGaps) {
2593 // TODO: extend the masked interleaved-group support to reversed access.
2594 assert((!Mask || !IG->isReverse()) &&
2595 "Reversed masked interleave-group not supported.");
2596 if (StoredValues.empty()) {
2597 for (unsigned I = 0; I < IG->getFactor(); ++I)
2598 if (Instruction *Inst = IG->getMember(I)) {
2599 assert(!Inst->getType()->isVoidTy() && "must have result");
2600 new VPRecipeValue(this, Inst);
2601 }
2602 } else {
2603 for (auto *SV : StoredValues)
2604 addOperand(SV);
2605 }
2606 if (Mask) {
2607 HasMask = true;
2608 addOperand(Mask);
2609 }
2610 }
2611
2612public:
2613 VPInterleaveBase *clone() override = 0;
2614
2615 static inline bool classof(const VPRecipeBase *R) {
2616 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2617 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2618 }
2619
2620 static inline bool classof(const VPUser *U) {
2621 auto *R = dyn_cast<VPRecipeBase>(U);
2622 return R && classof(R);
2623 }
2624
2625 /// Return the address accessed by this recipe.
2626 VPValue *getAddr() const {
2627 return getOperand(0); // Address is the 1st, mandatory operand.
2628 }
2629
2630 /// Return the mask used by this recipe. Note that a full mask is represented
2631 /// by a nullptr.
2632 VPValue *getMask() const {
2633 // Mask is optional and the last operand.
2634 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2635 }
2636
2637 /// Return true if the access needs a mask because of the gaps.
2638 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2639
2641
2642 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2643
2644 void execute(VPTransformState &State) override {
2645 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2646 }
2647
2648 /// Return the cost of this recipe.
2649 InstructionCost computeCost(ElementCount VF,
2650 VPCostContext &Ctx) const override;
2651
2652 /// Returns true if the recipe only uses the first lane of operand \p Op.
2653 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2654
2655 /// Returns the number of stored operands of this interleave group. Returns 0
2656 /// for load interleave groups.
2657 virtual unsigned getNumStoreOperands() const = 0;
2658
2659 /// Return the VPValues stored by this interleave group. If it is a load
2660 /// interleave group, return an empty ArrayRef.
2662 return ArrayRef<VPValue *>(op_end() -
2663 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2665 }
2666};
2667
2668/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2669/// or stores into one wide load/store and shuffles. The first operand of a
2670/// VPInterleave recipe is the address, followed by the stored values, followed
2671/// by an optional mask.
2673public:
2675 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2676 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2677 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2678 NeedsMaskForGaps, MD, DL) {}
2679
2680 ~VPInterleaveRecipe() override = default;
2681
2685 needsMaskForGaps(), *this, getDebugLoc());
2686 }
2687
2688 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2689
2690 /// Generate the wide load or store, and shuffles.
2691 void execute(VPTransformState &State) override;
2692
2693 bool usesFirstLaneOnly(const VPValue *Op) const override {
2695 "Op must be an operand of the recipe");
2696 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2697 }
2698
2699 unsigned getNumStoreOperands() const override {
2700 return getNumOperands() - (getMask() ? 2 : 1);
2701 }
2702
2703protected:
2704#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2705 /// Print the recipe.
2706 void printRecipe(raw_ostream &O, const Twine &Indent,
2707 VPSlotTracker &SlotTracker) const override;
2708#endif
2709};
2710
2711/// A recipe for interleaved memory operations with vector-predication
2712/// intrinsics. The first operand is the address, the second operand is the
2713/// explicit vector length. Stored values and mask are optional operands.
2715public:
2717 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2718 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2719 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2720 R.getDebugLoc()) {
2721 assert(!getInterleaveGroup()->isReverse() &&
2722 "Reversed interleave-group with tail folding is not supported.");
2723 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2724 "supported for scalable vector.");
2725 }
2726
2727 ~VPInterleaveEVLRecipe() override = default;
2728
2730 llvm_unreachable("cloning not implemented yet");
2731 }
2732
2733 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2734
2735 /// The VPValue of the explicit vector length.
2736 VPValue *getEVL() const { return getOperand(1); }
2737
2738 /// Generate the wide load or store, and shuffles.
2739 void execute(VPTransformState &State) override;
2740
2741 /// The recipe only uses the first lane of the address, and EVL operand.
2742 bool usesFirstLaneOnly(const VPValue *Op) const override {
2744 "Op must be an operand of the recipe");
2745 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2746 Op == getEVL();
2747 }
2748
2749 unsigned getNumStoreOperands() const override {
2750 return getNumOperands() - (getMask() ? 3 : 2);
2751 }
2752
2753protected:
2754#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2755 /// Print the recipe.
2756 void printRecipe(raw_ostream &O, const Twine &Indent,
2757 VPSlotTracker &SlotTracker) const override;
2758#endif
2759};
2760
2761/// A recipe to represent inloop, ordered or partial reduction operations. It
2762/// performs a reduction on a vector operand into a scalar (vector in the case
2763/// of a partial reduction) value, and adds the result to a chain. The Operands
2764/// are {ChainOp, VecOp, [Condition]}.
2766
2767 /// The recurrence kind for the reduction in question.
2768 RecurKind RdxKind;
2769 /// Whether the reduction is conditional.
2770 bool IsConditional = false;
2771 ReductionStyle Style;
2772
2773protected:
2774 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2776 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2777 ReductionStyle Style, DebugLoc DL)
2778 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2779 Style(Style) {
2780 if (CondOp) {
2781 IsConditional = true;
2782 addOperand(CondOp);
2783 }
2785 }
2786
2787public:
2789 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2791 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2792 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2793 DL) {}
2794
2796 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2798 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2799 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2800 DL) {}
2801
2802 ~VPReductionRecipe() override = default;
2803
2805 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2807 getCondOp(), Style, getDebugLoc());
2808 }
2809
2810 static inline bool classof(const VPRecipeBase *R) {
2811 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2812 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2813 }
2814
2815 static inline bool classof(const VPUser *U) {
2816 auto *R = dyn_cast<VPRecipeBase>(U);
2817 return R && classof(R);
2818 }
2819
2820 static inline bool classof(const VPValue *VPV) {
2821 const VPRecipeBase *R = VPV->getDefiningRecipe();
2822 return R && classof(R);
2823 }
2824
2825 static inline bool classof(const VPSingleDefRecipe *R) {
2826 return classof(static_cast<const VPRecipeBase *>(R));
2827 }
2828
2829 /// Generate the reduction in the loop.
2830 void execute(VPTransformState &State) override;
2831
2832 /// Return the cost of VPReductionRecipe.
2833 InstructionCost computeCost(ElementCount VF,
2834 VPCostContext &Ctx) const override;
2835
2836 /// Return the recurrence kind for the in-loop reduction.
2837 RecurKind getRecurrenceKind() const { return RdxKind; }
2838 /// Return true if the in-loop reduction is ordered.
2839 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2840 /// Return true if the in-loop reduction is conditional.
2841 bool isConditional() const { return IsConditional; };
2842 /// Returns true if the reduction outputs a vector with a scaled down VF.
2843 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2844 /// Returns true if the reduction is in-loop.
2845 bool isInLoop() const {
2846 return std::holds_alternative<RdxInLoop>(Style) ||
2847 std::holds_alternative<RdxOrdered>(Style);
2848 }
2849 /// The VPValue of the scalar Chain being accumulated.
2850 VPValue *getChainOp() const { return getOperand(0); }
2851 /// The VPValue of the vector value to be reduced.
2852 VPValue *getVecOp() const { return getOperand(1); }
2853 /// The VPValue of the condition for the block.
2855 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2856 }
2857 /// Get the factor that the VF of this recipe's output should be scaled by, or
2858 /// 1 if it isn't scaled.
2859 unsigned getVFScaleFactor() const {
2860 auto *Partial = std::get_if<RdxUnordered>(&Style);
2861 return Partial ? Partial->VFScaleFactor : 1;
2862 }
2863
2864protected:
2865#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2866 /// Print the recipe.
2867 void printRecipe(raw_ostream &O, const Twine &Indent,
2868 VPSlotTracker &SlotTracker) const override;
2869#endif
2870};
2871
2872/// A recipe to represent inloop reduction operations with vector-predication
2873/// intrinsics, performing a reduction on a vector operand with the explicit
2874/// vector length (EVL) into a scalar value, and adding the result to a chain.
2875/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2877public:
2881 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2882 R.getFastMathFlags(),
2884 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2885 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2886
2887 ~VPReductionEVLRecipe() override = default;
2888
2890 llvm_unreachable("cloning not implemented yet");
2891 }
2892
2893 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2894
2895 /// Generate the reduction in the loop
2896 void execute(VPTransformState &State) override;
2897
2898 /// The VPValue of the explicit vector length.
2899 VPValue *getEVL() const { return getOperand(2); }
2900
2901 /// Returns true if the recipe only uses the first lane of operand \p Op.
2902 bool usesFirstLaneOnly(const VPValue *Op) const override {
2904 "Op must be an operand of the recipe");
2905 return Op == getEVL();
2906 }
2907
2908protected:
2909#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2910 /// Print the recipe.
2911 void printRecipe(raw_ostream &O, const Twine &Indent,
2912 VPSlotTracker &SlotTracker) const override;
2913#endif
2914};
2915
2916/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2917/// copies of the original scalar type, one per lane, instead of producing a
2918/// single copy of widened type for all lanes. If the instruction is known to be
2919/// a single scalar, only one copy, per lane zero, will be generated.
2921 public VPIRMetadata {
2922 /// Indicator if only a single replica per lane is needed.
2923 bool IsSingleScalar;
2924
2925 /// Indicator if the replicas are also predicated.
2926 bool IsPredicated;
2927
2928public:
2930 bool IsSingleScalar, VPValue *Mask = nullptr,
2931 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2932 DebugLoc DL = DebugLoc::getUnknown())
2933 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2934 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2935 IsPredicated(Mask) {
2936 setUnderlyingValue(I);
2937 if (Mask)
2938 addOperand(Mask);
2939 }
2940
2941 ~VPReplicateRecipe() override = default;
2942
2944 auto *Copy = new VPReplicateRecipe(
2945 getUnderlyingInstr(), operands(), IsSingleScalar,
2946 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2947 Copy->transferFlags(*this);
2948 return Copy;
2949 }
2950
2951 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2952
2953 /// Generate replicas of the desired Ingredient. Replicas will be generated
2954 /// for all parts and lanes unless a specific part and lane are specified in
2955 /// the \p State.
2956 void execute(VPTransformState &State) override;
2957
2958 /// Return the cost of this VPReplicateRecipe.
2959 InstructionCost computeCost(ElementCount VF,
2960 VPCostContext &Ctx) const override;
2961
2962 bool isSingleScalar() const { return IsSingleScalar; }
2963
2964 bool isPredicated() const { return IsPredicated; }
2965
2966 /// Returns true if the recipe only uses the first lane of operand \p Op.
2967 bool usesFirstLaneOnly(const VPValue *Op) const override {
2969 "Op must be an operand of the recipe");
2970 return isSingleScalar();
2971 }
2972
2973 /// Returns true if the recipe uses scalars of operand \p Op.
2974 bool usesScalars(const VPValue *Op) const override {
2976 "Op must be an operand of the recipe");
2977 return true;
2978 }
2979
2980 /// Returns true if the recipe is used by a widened recipe via an intervening
2981 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2982 /// in a vector.
2983 bool shouldPack() const;
2984
2985 /// Return the mask of a predicated VPReplicateRecipe.
2987 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2988 return getOperand(getNumOperands() - 1);
2989 }
2990
2991 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2992
2993protected:
2994#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2995 /// Print the recipe.
2996 void printRecipe(raw_ostream &O, const Twine &Indent,
2997 VPSlotTracker &SlotTracker) const override;
2998#endif
2999};
3000
3001/// A recipe for generating conditional branches on the bits of a mask.
3003public:
3005 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3006
3009 }
3010
3011 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3012
3013 /// Generate the extraction of the appropriate bit from the block mask and the
3014 /// conditional branch.
3015 void execute(VPTransformState &State) override;
3016
3017 /// Return the cost of this VPBranchOnMaskRecipe.
3018 InstructionCost computeCost(ElementCount VF,
3019 VPCostContext &Ctx) const override;
3020
3021#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3022 /// Print the recipe.
3023 void printRecipe(raw_ostream &O, const Twine &Indent,
3024 VPSlotTracker &SlotTracker) const override {
3025 O << Indent << "BRANCH-ON-MASK ";
3027 }
3028#endif
3029
3030 /// Returns true if the recipe uses scalars of operand \p Op.
3031 bool usesScalars(const VPValue *Op) const override {
3033 "Op must be an operand of the recipe");
3034 return true;
3035 }
3036};
3037
3038/// A recipe to combine multiple recipes into a single 'expression' recipe,
3039/// which should be considered a single entity for cost-modeling and transforms.
3040/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3041/// expression recipes, before execute. The individual expression recipes are
3042/// completely disconnected from the def-use graph of other recipes not part of
3043/// the expression. Def-use edges between pairs of expression recipes remain
3044/// intact, whereas every edge between an expression recipe and a recipe outside
3045/// the expression is elevated to connect the non-expression recipe with the
3046/// VPExpressionRecipe itself.
3047class VPExpressionRecipe : public VPSingleDefRecipe {
3048 /// Recipes included in this VPExpressionRecipe. This could contain
3049 /// duplicates.
3050 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3051
3052 /// Temporary VPValues used for external operands of the expression, i.e.
3053 /// operands not defined by recipes in the expression.
3054 SmallVector<VPValue *> LiveInPlaceholders;
3055
3056 enum class ExpressionTypes {
3057 /// Represents an inloop extended reduction operation, performing a
3058 /// reduction on an extended vector operand into a scalar value, and adding
3059 /// the result to a chain.
3060 ExtendedReduction,
3061 /// Represent an inloop multiply-accumulate reduction, multiplying the
3062 /// extended vector operands, performing a reduction.add on the result, and
3063 /// adding the scalar result to a chain.
3064 ExtMulAccReduction,
3065 /// Represent an inloop multiply-accumulate reduction, multiplying the
3066 /// vector operands, performing a reduction.add on the result, and adding
3067 /// the scalar result to a chain.
3068 MulAccReduction,
3069 /// Represent an inloop multiply-accumulate reduction, multiplying the
3070 /// extended vector operands, negating the multiplication, performing a
3071 /// reduction.add on the result, and adding the scalar result to a chain.
3072 ExtNegatedMulAccReduction,
3073 };
3074
3075 /// Type of the expression.
3076 ExpressionTypes ExpressionType;
3077
3078 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3079 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3080 /// in the expression) are replaced by temporary VPValues and the original
3081 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3082 /// as needed (excluding last) to ensure they are only used by other recipes
3083 /// in the expression.
3084 VPExpressionRecipe(ExpressionTypes ExpressionType,
3085 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3086
3087public:
3089 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3091 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3094 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3095 {Ext0, Ext1, Mul, Red}) {}
3098 VPReductionRecipe *Red)
3099 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3100 {Ext0, Ext1, Mul, Sub, Red}) {
3101 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3102 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3103 "Expected an add reduction");
3104 assert(getNumOperands() >= 3 && "Expected at least three operands");
3105 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3106 assert(SubConst && SubConst->getValue() == 0 &&
3107 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3108 }
3109
3111 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3112 for (auto *R : reverse(ExpressionRecipes)) {
3113 if (ExpressionRecipesSeen.insert(R).second)
3114 delete R;
3115 }
3116 for (VPValue *T : LiveInPlaceholders)
3117 delete T;
3118 }
3119
3120 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3121
3122 VPExpressionRecipe *clone() override {
3123 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3124 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3125 for (auto *R : ExpressionRecipes)
3126 NewExpressiondRecipes.push_back(R->clone());
3127 for (auto *New : NewExpressiondRecipes) {
3128 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3129 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3130 // Update placeholder operands in the cloned recipe to use the external
3131 // operands, to be internalized when the cloned expression is constructed.
3132 for (const auto &[Placeholder, OutsideOp] :
3133 zip(LiveInPlaceholders, operands()))
3134 New->replaceUsesOfWith(Placeholder, OutsideOp);
3135 }
3136 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3137 }
3138
3139 /// Return the VPValue to use to infer the result type of the recipe.
3141 unsigned OpIdx =
3142 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3143 : 1;
3144 return getOperand(getNumOperands() - OpIdx);
3145 }
3146
3147 /// Insert the recipes of the expression back into the VPlan, directly before
3148 /// the current recipe. Leaves the expression recipe empty, which must be
3149 /// removed before codegen.
3150 void decompose();
3151
3152 unsigned getVFScaleFactor() const {
3153 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3154 return PR ? PR->getVFScaleFactor() : 1;
3155 }
3156
3157 /// Method for generating code, must not be called as this recipe is abstract.
3158 void execute(VPTransformState &State) override {
3159 llvm_unreachable("recipe must be removed before execute");
3160 }
3161
3163 VPCostContext &Ctx) const override;
3164
3165 /// Returns true if this expression contains recipes that may read from or
3166 /// write to memory.
3167 bool mayReadOrWriteMemory() const;
3168
3169 /// Returns true if this expression contains recipes that may have side
3170 /// effects.
3171 bool mayHaveSideEffects() const;
3172
3173 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3174 bool isSingleScalar() const;
3175
3176protected:
3177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3178 /// Print the recipe.
3179 void printRecipe(raw_ostream &O, const Twine &Indent,
3180 VPSlotTracker &SlotTracker) const override;
3181#endif
3182};
3183
3184/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3185/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3186/// order to merge values that are set under such a branch and feed their uses.
3187/// The phi nodes can be scalar or vector depending on the users of the value.
3188/// This recipe works in concert with VPBranchOnMaskRecipe.
3190public:
3191 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3192 /// nodes after merging back from a Branch-on-Mask.
3194 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3195 ~VPPredInstPHIRecipe() override = default;
3196
3198 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3199 }
3200
3201 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3202
3203 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3204 /// retain SSA form.
3205 void execute(VPTransformState &State) override;
3206
3207 /// Return the cost of this VPPredInstPHIRecipe.
3209 VPCostContext &Ctx) const override {
3210 // TODO: Compute accurate cost after retiring the legacy cost model.
3211 return 0;
3212 }
3213
3214 /// Returns true if the recipe uses scalars of operand \p Op.
3215 bool usesScalars(const VPValue *Op) const override {
3217 "Op must be an operand of the recipe");
3218 return true;
3219 }
3220
3221protected:
3222#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3223 /// Print the recipe.
3224 void printRecipe(raw_ostream &O, const Twine &Indent,
3225 VPSlotTracker &SlotTracker) const override;
3226#endif
3227};
3228
3229/// A common base class for widening memory operations. An optional mask can be
3230/// provided as the last operand.
3232 public VPIRMetadata {
3233protected:
3235
3236 /// Alignment information for this memory access.
3238
3239 /// Whether the accessed addresses are consecutive.
3241
3242 /// Whether the consecutive accessed addresses are in reverse order.
3244
3245 /// Whether the memory access is masked.
3246 bool IsMasked = false;
3247
3248 void setMask(VPValue *Mask) {
3249 assert(!IsMasked && "cannot re-set mask");
3250 if (!Mask)
3251 return;
3252 addOperand(Mask);
3253 IsMasked = true;
3254 }
3255
3256 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3257 std::initializer_list<VPValue *> Operands,
3258 bool Consecutive, bool Reverse,
3259 const VPIRMetadata &Metadata, DebugLoc DL)
3260 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3262 Reverse(Reverse) {
3263 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3265 "Reversed acccess without VPVectorEndPointerRecipe address?");
3266 }
3267
3268public:
3270 llvm_unreachable("cloning not supported");
3271 }
3272
3273 static inline bool classof(const VPRecipeBase *R) {
3274 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3275 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3276 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3277 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3278 }
3279
3280 static inline bool classof(const VPUser *U) {
3281 auto *R = dyn_cast<VPRecipeBase>(U);
3282 return R && classof(R);
3283 }
3284
3285 /// Return whether the loaded-from / stored-to addresses are consecutive.
3286 bool isConsecutive() const { return Consecutive; }
3287
3288 /// Return whether the consecutive loaded/stored addresses are in reverse
3289 /// order.
3290 bool isReverse() const { return Reverse; }
3291
3292 /// Return the address accessed by this recipe.
3293 VPValue *getAddr() const { return getOperand(0); }
3294
3295 /// Returns true if the recipe is masked.
3296 bool isMasked() const { return IsMasked; }
3297
3298 /// Return the mask used by this recipe. Note that a full mask is represented
3299 /// by a nullptr.
3300 VPValue *getMask() const {
3301 // Mask is optional and therefore the last operand.
3302 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3303 }
3304
3305 /// Returns the alignment of the memory access.
3306 Align getAlign() const { return Alignment; }
3307
3308 /// Generate the wide load/store.
3309 void execute(VPTransformState &State) override {
3310 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3311 }
3312
3313 /// Return the cost of this VPWidenMemoryRecipe.
3314 InstructionCost computeCost(ElementCount VF,
3315 VPCostContext &Ctx) const override;
3316
3318};
3319
3320/// A recipe for widening load operations, using the address to load from and an
3321/// optional mask.
3323 public VPRecipeValue {
3325 bool Consecutive, bool Reverse,
3326 const VPIRMetadata &Metadata, DebugLoc DL)
3327 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3328 Reverse, Metadata, DL),
3329 VPRecipeValue(this, &Load) {
3330 setMask(Mask);
3331 }
3332
3335 getMask(), Consecutive, Reverse, *this,
3336 getDebugLoc());
3337 }
3338
3339 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3340
3341 /// Generate a wide load or gather.
3342 void execute(VPTransformState &State) override;
3343
3344 /// Returns true if the recipe only uses the first lane of operand \p Op.
3345 bool usesFirstLaneOnly(const VPValue *Op) const override {
3347 "Op must be an operand of the recipe");
3348 // Widened, consecutive loads operations only demand the first lane of
3349 // their address.
3350 return Op == getAddr() && isConsecutive();
3351 }
3352
3353protected:
3354#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3355 /// Print the recipe.
3356 void printRecipe(raw_ostream &O, const Twine &Indent,
3357 VPSlotTracker &SlotTracker) const override;
3358#endif
3359};
3360
3361/// A recipe for widening load operations with vector-predication intrinsics,
3362/// using the address to load from, the explicit vector length and an optional
3363/// mask.
3365 public VPRecipeValue {
3367 VPValue *Mask)
3368 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3369 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3370 L.getDebugLoc()),
3371 VPRecipeValue(this, &getIngredient()) {
3372 setMask(Mask);
3373 }
3374
3375 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3376
3377 /// Return the EVL operand.
3378 VPValue *getEVL() const { return getOperand(1); }
3379
3380 /// Generate the wide load or gather.
3381 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3382
3383 /// Return the cost of this VPWidenLoadEVLRecipe.
3385 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3386
3387 /// Returns true if the recipe only uses the first lane of operand \p Op.
3388 bool usesFirstLaneOnly(const VPValue *Op) const override {
3390 "Op must be an operand of the recipe");
3391 // Widened loads only demand the first lane of EVL and consecutive loads
3392 // only demand the first lane of their address.
3393 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3394 }
3395
3396protected:
3397#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3398 /// Print the recipe.
3399 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3400 VPSlotTracker &SlotTracker) const override;
3401#endif
3402};
3403
3404/// A recipe for widening store operations, using the stored value, the address
3405/// to store to and an optional mask.
3407 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3408 VPValue *Mask, bool Consecutive, bool Reverse,
3409 const VPIRMetadata &Metadata, DebugLoc DL)
3410 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3411 Consecutive, Reverse, Metadata, DL) {
3412 setMask(Mask);
3413 }
3414
3420
3421 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3422
3423 /// Return the value stored by this recipe.
3424 VPValue *getStoredValue() const { return getOperand(1); }
3425
3426 /// Generate a wide store or scatter.
3427 void execute(VPTransformState &State) override;
3428
3429 /// Returns true if the recipe only uses the first lane of operand \p Op.
3430 bool usesFirstLaneOnly(const VPValue *Op) const override {
3432 "Op must be an operand of the recipe");
3433 // Widened, consecutive stores only demand the first lane of their address,
3434 // unless the same operand is also stored.
3435 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3436 }
3437
3438protected:
3439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3440 /// Print the recipe.
3441 void printRecipe(raw_ostream &O, const Twine &Indent,
3442 VPSlotTracker &SlotTracker) const override;
3443#endif
3444};
3445
3446/// A recipe for widening store operations with vector-predication intrinsics,
3447/// using the value to store, the address to store to, the explicit vector
3448/// length and an optional mask.
3451 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3452 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3453 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3454 S.isReverse(), S, S.getDebugLoc()) {
3455 setMask(Mask);
3456 }
3457
3458 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3459
3460 /// Return the address accessed by this recipe.
3461 VPValue *getStoredValue() const { return getOperand(1); }
3462
3463 /// Return the EVL operand.
3464 VPValue *getEVL() const { return getOperand(2); }
3465
3466 /// Generate the wide store or scatter.
3467 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3468
3469 /// Return the cost of this VPWidenStoreEVLRecipe.
3471 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3472
3473 /// Returns true if the recipe only uses the first lane of operand \p Op.
3474 bool usesFirstLaneOnly(const VPValue *Op) const override {
3476 "Op must be an operand of the recipe");
3477 if (Op == getEVL()) {
3478 assert(getStoredValue() != Op && "unexpected store of EVL");
3479 return true;
3480 }
3481 // Widened, consecutive memory operations only demand the first lane of
3482 // their address, unless the same operand is also stored. That latter can
3483 // happen with opaque pointers.
3484 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3485 }
3486
3487protected:
3488#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3489 /// Print the recipe.
3490 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3491 VPSlotTracker &SlotTracker) const override;
3492#endif
3493};
3494
3495/// Recipe to expand a SCEV expression.
3497 const SCEV *Expr;
3498
3499public:
3501 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3502
3503 ~VPExpandSCEVRecipe() override = default;
3504
3505 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3506
3507 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3508
3509 void execute(VPTransformState &State) override {
3510 llvm_unreachable("SCEV expressions must be expanded before final execute");
3511 }
3512
3513 /// Return the cost of this VPExpandSCEVRecipe.
3515 VPCostContext &Ctx) const override {
3516 // TODO: Compute accurate cost after retiring the legacy cost model.
3517 return 0;
3518 }
3519
3520 const SCEV *getSCEV() const { return Expr; }
3521
3522protected:
3523#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3524 /// Print the recipe.
3525 void printRecipe(raw_ostream &O, const Twine &Indent,
3526 VPSlotTracker &SlotTracker) const override;
3527#endif
3528};
3529
3530/// Canonical scalar induction phi of the vector loop. Starting at the specified
3531/// start value (either 0 or the resume value when vectorizing the epilogue
3532/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3533/// canonical induction variable.
3535public:
3537 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3538
3539 ~VPCanonicalIVPHIRecipe() override = default;
3540
3543 R->addOperand(getBackedgeValue());
3544 return R;
3545 }
3546
3547 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3548
3549 void execute(VPTransformState &State) override {
3550 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3551 "scalar phi recipe");
3552 }
3553
3554 /// Returns the start value of the canonical induction.
3556
3557 /// Returns the scalar type of the induction.
3558 Type *getScalarType() const { return getStartValue()->getType(); }
3559
3560 /// Returns true if the recipe only uses the first lane of operand \p Op.
3561 bool usesFirstLaneOnly(const VPValue *Op) const override {
3563 "Op must be an operand of the recipe");
3564 return true;
3565 }
3566
3567 /// Returns true if the recipe only uses the first part of operand \p Op.
3568 bool usesFirstPartOnly(const VPValue *Op) const override {
3570 "Op must be an operand of the recipe");
3571 return true;
3572 }
3573
3574 /// Return the cost of this VPCanonicalIVPHIRecipe.
3576 VPCostContext &Ctx) const override {
3577 // For now, match the behavior of the legacy cost model.
3578 return 0;
3579 }
3580
3581protected:
3582#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3583 /// Print the recipe.
3584 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3585 VPSlotTracker &SlotTracker) const override;
3586#endif
3587};
3588
3589/// A recipe for generating the active lane mask for the vector loop that is
3590/// used to predicate the vector operations.
3592public:
3594 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3595 DL) {}
3596
3597 ~VPActiveLaneMaskPHIRecipe() override = default;
3598
3601 if (getNumOperands() == 2)
3602 R->addOperand(getOperand(1));
3603 return R;
3604 }
3605
3606 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3607
3608 /// Generate the active lane mask phi of the vector loop.
3609 void execute(VPTransformState &State) override;
3610
3611protected:
3612#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3613 /// Print the recipe.
3614 void printRecipe(raw_ostream &O, const Twine &Indent,
3615 VPSlotTracker &SlotTracker) const override;
3616#endif
3617};
3618
3619/// A recipe for generating the phi node for the current index of elements,
3620/// adjusted in accordance with EVL value. It starts at the start value of the
3621/// canonical induction and gets incremented by EVL in each iteration of the
3622/// vector loop.
3624public:
3626 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3627
3628 ~VPEVLBasedIVPHIRecipe() override = default;
3629
3631 llvm_unreachable("cloning not implemented yet");
3632 }
3633
3634 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3635
3636 void execute(VPTransformState &State) override {
3637 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3638 "scalar phi recipe");
3639 }
3640
3641 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3643 VPCostContext &Ctx) const override {
3644 // For now, match the behavior of the legacy cost model.
3645 return 0;
3646 }
3647
3648 /// Returns true if the recipe only uses the first lane of operand \p Op.
3649 bool usesFirstLaneOnly(const VPValue *Op) const override {
3651 "Op must be an operand of the recipe");
3652 return true;
3653 }
3654
3655protected:
3656#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3657 /// Print the recipe.
3658 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3659 VPSlotTracker &SlotTracker) const override;
3660#endif
3661};
3662
3663/// A Recipe for widening the canonical induction variable of the vector loop.
3665 public VPUnrollPartAccessor<1> {
3666public:
3668 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3669
3670 ~VPWidenCanonicalIVRecipe() override = default;
3671
3676
3677 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3678
3679 /// Generate a canonical vector induction variable of the vector loop, with
3680 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3681 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3682 void execute(VPTransformState &State) override;
3683
3684 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3686 VPCostContext &Ctx) const override {
3687 // TODO: Compute accurate cost after retiring the legacy cost model.
3688 return 0;
3689 }
3690
3691protected:
3692#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3693 /// Print the recipe.
3694 void printRecipe(raw_ostream &O, const Twine &Indent,
3695 VPSlotTracker &SlotTracker) const override;
3696#endif
3697};
3698
3699/// A recipe for converting the input value \p IV value to the corresponding
3700/// value of an IV with different start and step values, using Start + IV *
3701/// Step.
3703 /// Kind of the induction.
3705 /// If not nullptr, the floating point induction binary operator. Must be set
3706 /// for floating point inductions.
3707 const FPMathOperator *FPBinOp;
3708
3709 /// Name to use for the generated IR instruction for the derived IV.
3710 std::string Name;
3711
3712public:
3714 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3715 const Twine &Name = "")
3717 IndDesc.getKind(),
3718 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3719 Start, CanonicalIV, Step, Name) {}
3720
3722 const FPMathOperator *FPBinOp, VPIRValue *Start,
3723 VPValue *IV, VPValue *Step, const Twine &Name = "")
3724 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3725 FPBinOp(FPBinOp), Name(Name.str()) {}
3726
3727 ~VPDerivedIVRecipe() override = default;
3728
3730 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3731 getStepValue());
3732 }
3733
3734 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3735
3736 /// Generate the transformed value of the induction at offset StartValue (1.
3737 /// operand) + IV (2. operand) * StepValue (3, operand).
3738 void execute(VPTransformState &State) override;
3739
3740 /// Return the cost of this VPDerivedIVRecipe.
3742 VPCostContext &Ctx) const override {
3743 // TODO: Compute accurate cost after retiring the legacy cost model.
3744 return 0;
3745 }
3746
3747 Type *getScalarType() const { return getStartValue()->getType(); }
3748
3750 VPValue *getStepValue() const { return getOperand(2); }
3751
3752 /// Returns true if the recipe only uses the first lane of operand \p Op.
3753 bool usesFirstLaneOnly(const VPValue *Op) const override {
3755 "Op must be an operand of the recipe");
3756 return true;
3757 }
3758
3759protected:
3760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3761 /// Print the recipe.
3762 void printRecipe(raw_ostream &O, const Twine &Indent,
3763 VPSlotTracker &SlotTracker) const override;
3764#endif
3765};
3766
3767/// A recipe for handling phi nodes of integer and floating-point inductions,
3768/// producing their scalar values.
3770 public VPUnrollPartAccessor<3> {
3771 Instruction::BinaryOps InductionOpcode;
3772
3773public:
3776 DebugLoc DL)
3777 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3778 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3779 InductionOpcode(Opcode) {}
3780
3782 VPValue *Step, VPValue *VF,
3785 IV, Step, VF, IndDesc.getInductionOpcode(),
3786 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3787 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3788 : FastMathFlags(),
3789 DL) {}
3790
3791 ~VPScalarIVStepsRecipe() override = default;
3792
3794 return new VPScalarIVStepsRecipe(
3795 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3797 getDebugLoc());
3798 }
3799
3800 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3801 /// this is only accurate after the VPlan has been unrolled.
3802 bool isPart0() const { return getUnrollPart(*this) == 0; }
3803
3804 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3805
3806 /// Generate the scalarized versions of the phi node as needed by their users.
3807 void execute(VPTransformState &State) override;
3808
3809 /// Return the cost of this VPScalarIVStepsRecipe.
3811 VPCostContext &Ctx) const override {
3812 // TODO: Compute accurate cost after retiring the legacy cost model.
3813 return 0;
3814 }
3815
3816 VPValue *getStepValue() const { return getOperand(1); }
3817
3818 /// Returns true if the recipe only uses the first lane of operand \p Op.
3819 bool usesFirstLaneOnly(const VPValue *Op) const override {
3821 "Op must be an operand of the recipe");
3822 return true;
3823 }
3824
3825protected:
3826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3827 /// Print the recipe.
3828 void printRecipe(raw_ostream &O, const Twine &Indent,
3829 VPSlotTracker &SlotTracker) const override;
3830#endif
3831};
3832
3833/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3834/// types implementing VPPhiAccessors. Used by isa<> & co.
3836 static inline bool isPossible(const VPRecipeBase *f) {
3837 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3839 }
3840};
3841/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3842/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3843template <typename SrcTy>
3844struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3845
3847
3848 /// doCast is used by cast<>.
3849 static inline VPPhiAccessors *doCast(SrcTy R) {
3850 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3851 switch (R->getVPDefID()) {
3852 case VPDef::VPInstructionSC:
3853 return cast<VPPhi>(R);
3854 case VPDef::VPIRInstructionSC:
3855 return cast<VPIRPhi>(R);
3856 case VPDef::VPWidenPHISC:
3857 return cast<VPWidenPHIRecipe>(R);
3858 default:
3859 return cast<VPHeaderPHIRecipe>(R);
3860 }
3861 }());
3862 }
3863
3864 /// doCastIfPossible is used by dyn_cast<>.
3865 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3866 if (!Self::isPossible(f))
3867 return nullptr;
3868 return doCast(f);
3869 }
3870};
3871template <>
3874template <>
3877
3878/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3879/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3880namespace detail {
3881template <typename DstTy, typename RecipeBasePtrTy>
3882static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3883 switch (R->getVPDefID()) {
3884 case VPDef::VPInstructionSC:
3885 return cast<VPInstruction>(R);
3886 case VPDef::VPWidenSC:
3887 return cast<VPWidenRecipe>(R);
3888 case VPDef::VPWidenCastSC:
3889 return cast<VPWidenCastRecipe>(R);
3890 case VPDef::VPWidenIntrinsicSC:
3892 case VPDef::VPWidenCallSC:
3893 return cast<VPWidenCallRecipe>(R);
3894 case VPDef::VPReplicateSC:
3895 return cast<VPReplicateRecipe>(R);
3896 case VPDef::VPInterleaveSC:
3897 case VPDef::VPInterleaveEVLSC:
3898 return cast<VPInterleaveBase>(R);
3899 case VPDef::VPWidenLoadSC:
3900 case VPDef::VPWidenLoadEVLSC:
3901 case VPDef::VPWidenStoreSC:
3902 case VPDef::VPWidenStoreEVLSC:
3903 return cast<VPWidenMemoryRecipe>(R);
3904 default:
3905 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3906 }
3907}
3908} // namespace detail
3909
3910/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3911/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3912template <typename DstTy, typename SrcTy>
3913struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3914 static inline bool isPossible(SrcTy R) {
3915 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3916 // also handled in castToVPIRMetadata.
3921 R);
3922 }
3923
3924 using RetTy = DstTy *;
3925
3926 /// doCast is used by cast<>.
3927 static inline RetTy doCast(SrcTy R) {
3929 }
3930
3931 /// doCastIfPossible is used by dyn_cast<>.
3932 static inline RetTy doCastIfPossible(SrcTy R) {
3933 if (!isPossible(R))
3934 return nullptr;
3935 return doCast(R);
3936 }
3937};
3938template <>
3941template <>
3944
3945/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3946/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3947/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3948class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3949 friend class VPlan;
3950
3951 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3952 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3953 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3954 if (Recipe)
3955 appendRecipe(Recipe);
3956 }
3957
3958public:
3960
3961protected:
3962 /// The VPRecipes held in the order of output instructions to generate.
3964
3965 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3966 : VPBlockBase(BlockSC, Name.str()) {}
3967
3968public:
3969 ~VPBasicBlock() override {
3970 while (!Recipes.empty())
3971 Recipes.pop_back();
3972 }
3973
3974 /// Instruction iterators...
3979
3980 //===--------------------------------------------------------------------===//
3981 /// Recipe iterator methods
3982 ///
3983 inline iterator begin() { return Recipes.begin(); }
3984 inline const_iterator begin() const { return Recipes.begin(); }
3985 inline iterator end() { return Recipes.end(); }
3986 inline const_iterator end() const { return Recipes.end(); }
3987
3988 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3989 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3990 inline reverse_iterator rend() { return Recipes.rend(); }
3991 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3992
3993 inline size_t size() const { return Recipes.size(); }
3994 inline bool empty() const { return Recipes.empty(); }
3995 inline const VPRecipeBase &front() const { return Recipes.front(); }
3996 inline VPRecipeBase &front() { return Recipes.front(); }
3997 inline const VPRecipeBase &back() const { return Recipes.back(); }
3998 inline VPRecipeBase &back() { return Recipes.back(); }
3999
4000 /// Returns a reference to the list of recipes.
4002
4003 /// Returns a pointer to a member of the recipe list.
4004 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4005 return &VPBasicBlock::Recipes;
4006 }
4007
4008 /// Method to support type inquiry through isa, cast, and dyn_cast.
4009 static inline bool classof(const VPBlockBase *V) {
4010 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4011 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4012 }
4013
4014 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4015 assert(Recipe && "No recipe to append.");
4016 assert(!Recipe->Parent && "Recipe already in VPlan");
4017 Recipe->Parent = this;
4018 Recipes.insert(InsertPt, Recipe);
4019 }
4020
4021 /// Augment the existing recipes of a VPBasicBlock with an additional
4022 /// \p Recipe as the last recipe.
4023 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4024
4025 /// The method which generates the output IR instructions that correspond to
4026 /// this VPBasicBlock, thereby "executing" the VPlan.
4027 void execute(VPTransformState *State) override;
4028
4029 /// Return the cost of this VPBasicBlock.
4030 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4031
4032 /// Return the position of the first non-phi node recipe in the block.
4033 iterator getFirstNonPhi();
4034
4035 /// Returns an iterator range over the PHI-like recipes in the block.
4039
4040 /// Split current block at \p SplitAt by inserting a new block between the
4041 /// current block and its successors and moving all recipes starting at
4042 /// SplitAt to the new block. Returns the new block.
4043 VPBasicBlock *splitAt(iterator SplitAt);
4044
4045 VPRegionBlock *getEnclosingLoopRegion();
4046 const VPRegionBlock *getEnclosingLoopRegion() const;
4047
4048#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4049 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4050 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4051 ///
4052 /// Note that the numbering is applied to the whole VPlan, so printing
4053 /// individual blocks is consistent with the whole VPlan printing.
4054 void print(raw_ostream &O, const Twine &Indent,
4055 VPSlotTracker &SlotTracker) const override;
4056 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4057#endif
4058
4059 /// If the block has multiple successors, return the branch recipe terminating
4060 /// the block. If there are no or only a single successor, return nullptr;
4061 VPRecipeBase *getTerminator();
4062 const VPRecipeBase *getTerminator() const;
4063
4064 /// Returns true if the block is exiting it's parent region.
4065 bool isExiting() const;
4066
4067 /// Clone the current block and it's recipes, without updating the operands of
4068 /// the cloned recipes.
4069 VPBasicBlock *clone() override;
4070
4071 /// Returns the predecessor block at index \p Idx with the predecessors as per
4072 /// the corresponding plain CFG. If the block is an entry block to a region,
4073 /// the first predecessor is the single predecessor of a region, and the
4074 /// second predecessor is the exiting block of the region.
4075 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4076
4077protected:
4078 /// Execute the recipes in the IR basic block \p BB.
4079 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4080
4081 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4082 /// generated for this VPBB.
4083 void connectToPredecessors(VPTransformState &State);
4084
4085private:
4086 /// Create an IR BasicBlock to hold the output instructions generated by this
4087 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4088 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4089};
4090
4091inline const VPBasicBlock *
4093 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4094}
4095
4096/// A special type of VPBasicBlock that wraps an existing IR basic block.
4097/// Recipes of the block get added before the first non-phi instruction in the
4098/// wrapped block.
4099/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4100/// preheader block.
4101class VPIRBasicBlock : public VPBasicBlock {
4102 friend class VPlan;
4103
4104 BasicBlock *IRBB;
4105
4106 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4107 VPIRBasicBlock(BasicBlock *IRBB)
4108 : VPBasicBlock(VPIRBasicBlockSC,
4109 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4110 IRBB(IRBB) {}
4111
4112public:
4113 ~VPIRBasicBlock() override = default;
4114
4115 static inline bool classof(const VPBlockBase *V) {
4116 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4117 }
4118
4119 /// The method which generates the output IR instructions that correspond to
4120 /// this VPBasicBlock, thereby "executing" the VPlan.
4121 void execute(VPTransformState *State) override;
4122
4123 VPIRBasicBlock *clone() override;
4124
4125 BasicBlock *getIRBasicBlock() const { return IRBB; }
4126};
4127
4128/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4129/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4130/// A VPRegionBlock may indicate that its contents are to be replicated several
4131/// times. This is designed to support predicated scalarization, in which a
4132/// scalar if-then code structure needs to be generated VF * UF times. Having
4133/// this replication indicator helps to keep a single model for multiple
4134/// candidate VF's. The actual replication takes place only once the desired VF
4135/// and UF have been determined.
4136class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4137 friend class VPlan;
4138
4139 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4140 VPBlockBase *Entry;
4141
4142 /// Hold the Single Exiting block of the SESE region modelled by the
4143 /// VPRegionBlock.
4144 VPBlockBase *Exiting;
4145
4146 /// An indicator whether this region is to generate multiple replicated
4147 /// instances of output IR corresponding to its VPBlockBases.
4148 bool IsReplicator;
4149
4150 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4151 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4152 const std::string &Name = "", bool IsReplicator = false)
4153 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4154 IsReplicator(IsReplicator) {
4155 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4156 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4157 Entry->setParent(this);
4158 Exiting->setParent(this);
4159 }
4160 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4161 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4162 IsReplicator(IsReplicator) {}
4163
4164public:
4165 ~VPRegionBlock() override = default;
4166
4167 /// Method to support type inquiry through isa, cast, and dyn_cast.
4168 static inline bool classof(const VPBlockBase *V) {
4169 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4170 }
4171
4172 const VPBlockBase *getEntry() const { return Entry; }
4173 VPBlockBase *getEntry() { return Entry; }
4174
4175 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4176 /// EntryBlock must have no predecessors.
4177 void setEntry(VPBlockBase *EntryBlock) {
4178 assert(EntryBlock->getPredecessors().empty() &&
4179 "Entry block cannot have predecessors.");
4180 Entry = EntryBlock;
4181 EntryBlock->setParent(this);
4182 }
4183
4184 const VPBlockBase *getExiting() const { return Exiting; }
4185 VPBlockBase *getExiting() { return Exiting; }
4186
4187 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4188 /// ExitingBlock must have no successors.
4189 void setExiting(VPBlockBase *ExitingBlock) {
4190 assert(ExitingBlock->getSuccessors().empty() &&
4191 "Exit block cannot have successors.");
4192 Exiting = ExitingBlock;
4193 ExitingBlock->setParent(this);
4194 }
4195
4196 /// Returns the pre-header VPBasicBlock of the loop region.
4198 assert(!isReplicator() && "should only get pre-header of loop regions");
4199 return getSinglePredecessor()->getExitingBasicBlock();
4200 }
4201
4202 /// An indicator whether this region is to generate multiple replicated
4203 /// instances of output IR corresponding to its VPBlockBases.
4204 bool isReplicator() const { return IsReplicator; }
4205
4206 /// The method which generates the output IR instructions that correspond to
4207 /// this VPRegionBlock, thereby "executing" the VPlan.
4208 void execute(VPTransformState *State) override;
4209
4210 // Return the cost of this region.
4211 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4212
4213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4214 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4215 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4216 /// consequtive numbers.
4217 ///
4218 /// Note that the numbering is applied to the whole VPlan, so printing
4219 /// individual regions is consistent with the whole VPlan printing.
4220 void print(raw_ostream &O, const Twine &Indent,
4221 VPSlotTracker &SlotTracker) const override;
4222 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4223#endif
4224
4225 /// Clone all blocks in the single-entry single-exit region of the block and
4226 /// their recipes without updating the operands of the cloned recipes.
4227 VPRegionBlock *clone() override;
4228
4229 /// Remove the current region from its VPlan, connecting its predecessor to
4230 /// its entry, and its exiting block to its successor.
4231 void dissolveToCFGLoop();
4232
4233 /// Returns the canonical induction recipe of the region.
4235 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4236 if (EntryVPBB->empty()) {
4237 // VPlan native path. TODO: Unify both code paths.
4238 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4239 }
4240 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4241 }
4243 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4244 }
4245
4246 /// Return the type of the canonical IV for loop regions.
4247 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4248 const Type *getCanonicalIVType() const {
4249 return getCanonicalIV()->getScalarType();
4250 }
4251};
4252
4254 return getParent()->getParent();
4255}
4256
4258 return getParent()->getParent();
4259}
4260
4261/// VPlan models a candidate for vectorization, encoding various decisions take
4262/// to produce efficient output IR, including which branches, basic-blocks and
4263/// output IR instructions to generate, and their cost. VPlan holds a
4264/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4265/// VPBasicBlock.
4266class VPlan {
4267 friend class VPlanPrinter;
4268 friend class VPSlotTracker;
4269
4270 /// VPBasicBlock corresponding to the original preheader. Used to place
4271 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4272 /// rest of VPlan execution.
4273 /// When this VPlan is used for the epilogue vector loop, the entry will be
4274 /// replaced by a new entry block created during skeleton creation.
4275 VPBasicBlock *Entry;
4276
4277 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4278 VPIRBasicBlock *ScalarHeader;
4279
4280 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4281 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4282 /// e.g. if the scalar epilogue always executes.
4284
4285 /// Holds the VFs applicable to this VPlan.
4287
4288 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4289 /// any UF.
4291
4292 /// Holds the name of the VPlan, for printing.
4293 std::string Name;
4294
4295 /// Represents the trip count of the original loop, for folding
4296 /// the tail.
4297 VPValue *TripCount = nullptr;
4298
4299 /// Represents the backedge taken count of the original loop, for folding
4300 /// the tail. It equals TripCount - 1.
4301 VPSymbolicValue *BackedgeTakenCount = nullptr;
4302
4303 /// Represents the vector trip count.
4304 VPSymbolicValue VectorTripCount;
4305
4306 /// Represents the vectorization factor of the loop.
4307 VPSymbolicValue VF;
4308
4309 /// Represents the loop-invariant VF * UF of the vector loop region.
4310 VPSymbolicValue VFxUF;
4311
4312 /// Contains all the external definitions created for this VPlan, as a mapping
4313 /// from IR Values to VPIRValues.
4315
4316 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4317 /// VPlan is destroyed.
4318 SmallVector<VPBlockBase *> CreatedBlocks;
4319
4320 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4321 /// wrapping the original header of the scalar loop.
4322 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4323 : Entry(Entry), ScalarHeader(ScalarHeader) {
4324 Entry->setPlan(this);
4325 assert(ScalarHeader->getNumSuccessors() == 0 &&
4326 "scalar header must be a leaf node");
4327 }
4328
4329public:
4330 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4331 /// original preheader and scalar header of \p L, to be used as entry and
4332 /// scalar header blocks of the new VPlan.
4333 VPlan(Loop *L);
4334
4335 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4336 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4337 VPlan(BasicBlock *ScalarHeaderBB) {
4338 setEntry(createVPBasicBlock("preheader"));
4339 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4340 }
4341
4343
4345 Entry = VPBB;
4346 VPBB->setPlan(this);
4347 }
4348
4349 /// Generate the IR code for this VPlan.
4350 void execute(VPTransformState *State);
4351
4352 /// Return the cost of this plan.
4354
4355 VPBasicBlock *getEntry() { return Entry; }
4356 const VPBasicBlock *getEntry() const { return Entry; }
4357
4358 /// Returns the preheader of the vector loop region, if one exists, or null
4359 /// otherwise.
4361 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4362 return VectorRegion
4363 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4364 : nullptr;
4365 }
4366
4367 /// Returns the VPRegionBlock of the vector loop.
4370
4371 /// Returns the 'middle' block of the plan, that is the block that selects
4372 /// whether to execute the scalar tail loop or the exit block from the loop
4373 /// latch. If there is an early exit from the vector loop, the middle block
4374 /// conceptully has the early exit block as third successor, split accross 2
4375 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4376 /// tail loop or the exit block. If the scalar tail loop or exit block are
4377 /// known to always execute, the middle block may branch directly to that
4378 /// block. This function cannot be called once the vector loop region has been
4379 /// removed.
4381 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4382 assert(
4383 LoopRegion &&
4384 "cannot call the function after vector loop region has been removed");
4385 // The middle block is always the last successor of the region.
4386 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4387 }
4388
4390 return const_cast<VPlan *>(this)->getMiddleBlock();
4391 }
4392
4393 /// Return the VPBasicBlock for the preheader of the scalar loop.
4395 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4396 }
4397
4398 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4399 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4400
4401 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4402 /// the original scalar loop.
4403 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4404
4405 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4406 /// exit block.
4408
4409 /// Returns true if \p VPBB is an exit block.
4410 bool isExitBlock(VPBlockBase *VPBB);
4411
4412 /// The trip count of the original loop.
4414 assert(TripCount && "trip count needs to be set before accessing it");
4415 return TripCount;
4416 }
4417
4418 /// Set the trip count assuming it is currently null; if it is not - use
4419 /// resetTripCount().
4420 void setTripCount(VPValue *NewTripCount) {
4421 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4422 TripCount = NewTripCount;
4423 }
4424
4425 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4426 /// the original trip count have been replaced.
4427 void resetTripCount(VPValue *NewTripCount) {
4428 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4429 "TripCount must be set when resetting");
4430 TripCount = NewTripCount;
4431 }
4432
4433 /// The backedge taken count of the original loop.
4435 if (!BackedgeTakenCount)
4436 BackedgeTakenCount = new VPSymbolicValue();
4437 return BackedgeTakenCount;
4438 }
4439 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4440
4441 /// The vector trip count.
4442 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4443
4444 /// Returns the VF of the vector loop region.
4445 VPValue &getVF() { return VF; };
4446 const VPValue &getVF() const { return VF; };
4447
4448 /// Returns VF * UF of the vector loop region.
4449 VPValue &getVFxUF() { return VFxUF; }
4450
4453 }
4454
4455 void addVF(ElementCount VF) { VFs.insert(VF); }
4456
4458 assert(hasVF(VF) && "Cannot set VF not already in plan");
4459 VFs.clear();
4460 VFs.insert(VF);
4461 }
4462
4463 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4464 bool hasScalableVF() const {
4465 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4466 }
4467
4468 /// Returns an iterator range over all VFs of the plan.
4471 return VFs;
4472 }
4473
4474 bool hasScalarVFOnly() const {
4475 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4476 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4477 "Plan with scalar VF should only have a single VF");
4478 return HasScalarVFOnly;
4479 }
4480
4481 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4482
4483 unsigned getUF() const {
4484 assert(UFs.size() == 1 && "Expected a single UF");
4485 return UFs[0];
4486 }
4487
4488 void setUF(unsigned UF) {
4489 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4490 UFs.clear();
4491 UFs.insert(UF);
4492 }
4493
4494 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4495 /// concrete UF.
4496 bool isUnrolled() const { return UFs.size() == 1; }
4497
4498 /// Return a string with the name of the plan and the applicable VFs and UFs.
4499 std::string getName() const;
4500
4501 void setName(const Twine &newName) { Name = newName.str(); }
4502
4503 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4504 /// yet) for \p V.
4506 assert(V && "Trying to get or add the VPIRValue of a null Value");
4507 auto [It, Inserted] = LiveIns.try_emplace(V);
4508 if (Inserted)
4509 It->second = new VPIRValue(V);
4510
4511 assert(isa<VPIRValue>(It->second) &&
4512 "Only VPIRValues should be in mapping");
4513 return It->second;
4514 }
4516 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4517 return getOrAddLiveIn(V->getValue());
4518 }
4519
4520 /// Return a VPIRValue wrapping i1 true.
4521 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4522
4523 /// Return a VPIRValue wrapping i1 false.
4524 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4525
4526 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4527 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4528 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4529 }
4530
4531 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4532 /// value.
4534 bool IsSigned = false) {
4535 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4536 }
4537
4538 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4540 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4541 }
4542
4543 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4544 /// otherwise.
4545 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4546
4547 /// Return the list of live-in VPValues available in the VPlan.
4548 auto getLiveIns() const { return LiveIns.values(); }
4549
4550#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4551 /// Print the live-ins of this VPlan to \p O.
4552 void printLiveIns(raw_ostream &O) const;
4553
4554 /// Print this VPlan to \p O.
4555 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4556
4557 /// Print this VPlan in DOT format to \p O.
4558 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4559
4560 /// Dump the plan to stderr (for debugging).
4561 LLVM_DUMP_METHOD void dump() const;
4562#endif
4563
4564 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4565 /// recipes to refer to the clones, and return it.
4567
4568 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4569 /// present. The returned block is owned by the VPlan and deleted once the
4570 /// VPlan is destroyed.
4572 VPRecipeBase *Recipe = nullptr) {
4573 auto *VPB = new VPBasicBlock(Name, Recipe);
4574 CreatedBlocks.push_back(VPB);
4575 return VPB;
4576 }
4577
4578 /// Create a new loop region with \p Name and entry and exiting blocks set
4579 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4580 /// owned by the VPlan and deleted once the VPlan is destroyed.
4581 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4582 VPBlockBase *Entry = nullptr,
4583 VPBlockBase *Exiting = nullptr) {
4584 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4585 : new VPRegionBlock(Name);
4586 CreatedBlocks.push_back(VPB);
4587 return VPB;
4588 }
4589
4590 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4591 /// returned block is owned by the VPlan and deleted once the VPlan is
4592 /// destroyed.
4594 const std::string &Name = "") {
4595 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4596 CreatedBlocks.push_back(VPB);
4597 return VPB;
4598 }
4599
4600 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4601 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4602 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4604
4605 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4606 /// instructions in \p IRBB, except its terminator which is managed by the
4607 /// successors of the block in VPlan. The returned block is owned by the VPlan
4608 /// and deleted once the VPlan is destroyed.
4610
4611 /// Returns true if the VPlan is based on a loop with an early exit. That is
4612 /// the case if the VPlan has either more than one exit block or a single exit
4613 /// block with multiple predecessors (one for the exit via the latch and one
4614 /// via the other early exit).
4615 bool hasEarlyExit() const {
4616 return count_if(ExitBlocks,
4617 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4618 1 ||
4619 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4620 }
4621
4622 /// Returns true if the scalar tail may execute after the vector loop. Note
4623 /// that this relies on unneeded branches to the scalar tail loop being
4624 /// removed.
4625 bool hasScalarTail() const {
4626 return !(!getScalarPreheader()->hasPredecessors() ||
4628 }
4629};
4630
4631#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4632inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4633 Plan.print(OS);
4634 return OS;
4635}
4636#endif
4637
4638} // end namespace llvm
4639
4640#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3599
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3593
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3948
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3976
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4023
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:3978
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3975
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4001
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3959
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3965
iterator end()
Definition VPlan.h:3985
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3983
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:3977
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4036
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:782
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:228
~VPBasicBlock() override
Definition VPlan.h:3969
const_reverse_iterator rbegin() const
Definition VPlan.h:3989
reverse_iterator rend()
Definition VPlan.h:3990
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3963
VPRecipeBase & back()
Definition VPlan.h:3998
const VPRecipeBase & front() const
Definition VPlan.h:3995
const_iterator begin() const
Definition VPlan.h:3984
VPRecipeBase & front()
Definition VPlan.h:3996
const VPRecipeBase & back() const
Definition VPlan.h:3997
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4014
bool empty() const
Definition VPlan.h:3994
const_iterator end() const
Definition VPlan.h:3986
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4009
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4004
reverse_iterator rbegin()
Definition VPlan.h:3988
friend class VPlan
Definition VPlan.h:3949
size_t size() const
Definition VPlan.h:3993
const_reverse_iterator rend() const
Definition VPlan.h:3991
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2526
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2531
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2521
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2542
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2551
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2508
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2503
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2537
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2517
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:657
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:220
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:192
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:212
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:178
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3023
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3007
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3031
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3004
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3534
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3536
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3561
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3541
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3568
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3555
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3558
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3549
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3575
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:332
VPDef(const unsigned char SC)
Definition VPlanValue.h:412
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:3749
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3741
VPValue * getStepValue() const
Definition VPlan.h:3750
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3713
Type * getScalarType() const
Definition VPlan.h:3747
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3729
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3753
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3721
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3649
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3630
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3636
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3642
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3625
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3509
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3514
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3500
const SCEV * getSCEV() const
Definition VPlan.h:3520
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3505
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3158
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3140
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3122
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3110
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3096
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3088
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3092
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3152
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3090
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2025
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2038
static bool classof(const VPValue *V)
Definition VPlan.h:2035
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2061
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2066
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2050
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2058
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2031
VPValue * getStartValue() const
Definition VPlan.h:2053
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2070
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2020
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1784
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1801
unsigned getOpcode() const
Definition VPlan.h:1797
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1778
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4101
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:457
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4125
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4115
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4102
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:482
Class to record and manage LLVM IR flags.
Definition VPlan.h:608
FastMathFlagsTy FMFs
Definition VPlan.h:679
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:739
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:731
WrapFlagsTy WrapFlags
Definition VPlan.h:673
CmpInst::Predicate CmpPredicate
Definition VPlan.h:672
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:725
GEPNoWrapFlags GEPFlags
Definition VPlan.h:677
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:857
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:674
CmpInst::Predicate getPredicate() const
Definition VPlan.h:834
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:864
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:748
ExactFlagsTy ExactFlags
Definition VPlan.h:676
bool hasNoSignedWrap() const
Definition VPlan.h:883
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:894
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:734
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:737
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:742
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:722
bool isNonNeg() const
Definition VPlan.h:866
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:849
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:852
DisjointFlagsTy DisjointFlags
Definition VPlan.h:675
unsigned AllFlags
Definition VPlan.h:681
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:840
bool hasNoUnsignedWrap() const
Definition VPlan.h:872
FCmpFlagsTy FCmpFlags
Definition VPlan.h:680
NonNegFlagsTy NonNegFlags
Definition VPlan.h:678
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:758
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:794
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:745
VPIRFlags(Instruction &I)
Definition VPlan.h:687
Instruction & getInstruction() const
Definition VPlan.h:1455
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1463
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1442
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1469
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1457
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1430
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:980
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1016
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:988
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1000
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1269
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1310
static bool classof(const VPUser *R)
Definition VPlan.h:1295
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1277
Type * getResultType() const
Definition VPlan.h:1316
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1299
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1034
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1181
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1136
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1081
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1126
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1139
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1078
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1130
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1073
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1070
@ VScale
Returns the value for vscale.
Definition VPlan.h:1141
@ CanonicalIVIncrementForPart
Definition VPlan.h:1054
bool hasResult() const
Definition VPlan.h:1205
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1246
unsigned getOpcode() const
Definition VPlan.h:1189
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1249
friend class VPlanSlp
Definition VPlan.h:1035
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2638
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2644
static bool classof(const VPUser *U)
Definition VPlan.h:2620
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2586
Instruction * getInsertPos() const
Definition VPlan.h:2642
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2615
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2640
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2632
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2661
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2626
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2714
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2742
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2736
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2749
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2729
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2716
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2672
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2699
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2682
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2693
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2674
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1328
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1350
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1345
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4092
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1370
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1337
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1355
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1359
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3215
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3197
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3208
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3193
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4253
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:211
friend class VPDef
Definition VPlanValue.h:213
VPRecipeValue(VPDef *Def, Value *UV=nullptr)
Definition VPlan.cpp:139
friend class VPValue
Definition VPlanValue.h:212
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2899
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2878
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2902
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2889
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2464
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2450
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2429
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2443
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2476
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2458
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2467
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2481
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2418
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2473
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2461
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2765
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2774
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2841
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2810
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2825
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2852
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2854
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2837
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2788
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2839
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2795
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2843
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2850
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2845
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2804
static bool classof(const VPUser *U)
Definition VPlan.h:2815
static bool classof(const VPValue *VPV)
Definition VPlan.h:2820
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2859
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4136
const VPBlockBase * getEntry() const
Definition VPlan.h:4172
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4247
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4204
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4189
VPBlockBase * getExiting()
Definition VPlan.h:4185
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4234
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4177
const Type * getCanonicalIVType() const
Definition VPlan.h:4248
const VPBlockBase * getExiting() const
Definition VPlan.h:4184
VPBlockBase * getEntry()
Definition VPlan.h:4173
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4242
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4197
friend class VPlan
Definition VPlan.h:4137
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4168
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2921
bool isSingleScalar() const
Definition VPlan.h:2962
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2929
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2974
bool isPredicated() const
Definition VPlan.h:2964
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2943
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2967
unsigned getOpcode() const
Definition VPlan.h:2991
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:2986
VPValue * getStepValue() const
Definition VPlan.h:3816
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3810
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3781
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3802
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3793
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3774
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3819
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:594
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:597
static bool classof(const VPUser *U)
Definition VPlan.h:586
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:968
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:229
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1428
operand_range operands()
Definition VPlanValue.h:297
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:273
unsigned getNumOperands() const
Definition VPlanValue.h:267
operand_iterator op_end()
Definition VPlanValue.h:295
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:268
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:248
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:291
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:290
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:45
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:133
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119
friend class VPRecipeValue
Definition VPlanValue.h:51
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:72
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:173
unsigned getNumUsers() const
Definition VPlanValue.h:105
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1900
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1921
const VPValue * getVFValue() const
Definition VPlan.h:1896
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1914
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1907
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1885
Type * getSourceElementType() const
Definition VPlan.h:1955
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1957
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1964
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1942
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:1980
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1971
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1718
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1725
const_operand_range args() const
Definition VPlan.h:1758
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1739
operand_range args()
Definition VPlan.h:1757
Function * getCalledScalarFunction() const
Definition VPlan.h:1753
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3685
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3672
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3667
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1568
Instruction::CastOps getOpcode() const
Definition VPlan.h:1604
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1607
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1576
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1589
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1848
Type * getSourceElementType() const
Definition VPlan.h:1853
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1856
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1840
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1826
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2150
static bool classof(const VPValue *V)
Definition VPlan.h:2101
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2120
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2135
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2113
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2128
PHINode * getPHINode() const
Definition VPlan.h:2130
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2089
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2116
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2133
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2142
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2096
const VPValue * getVFValue() const
Definition VPlan.h:2123
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2106
const VPValue * getStepValue() const
Definition VPlan.h:2117
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2211
const TruncInst * getTruncInst() const
Definition VPlan.h:2227
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2205
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2215
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2197
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2171
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2226
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2180
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2242
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2222
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2235
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1618
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1649
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1689
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1698
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1635
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1704
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1670
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1701
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1692
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3246
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3243
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3286
static bool classof(const VPUser *U)
Definition VPlan.h:3280
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3309
Instruction & Ingredient
Definition VPlan.h:3234
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3269
Instruction & getIngredient() const
Definition VPlan.h:3317
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3240
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3273
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3300
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3237
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3296
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3256
void setMask(VPValue *Mask)
Definition VPlan.h:3248
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3306
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3293
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3290
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2336
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2307
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2314
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2269
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2278
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2259
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1520
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1534
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1559
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1524
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1549
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4266
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4545
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1117
friend class VPSlotTracker
Definition VPlan.h:4268
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1093
bool hasVF(ElementCount VF) const
Definition VPlan.h:4463
LLVMContext & getContext() const
Definition VPlan.h:4451
VPBasicBlock * getEntry()
Definition VPlan.h:4355
void setName(const Twine &newName)
Definition VPlan.h:4501
bool hasScalableVF() const
Definition VPlan.h:4464
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4449
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4445
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4413
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4434
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4470
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:901
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:879
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4515
const VPValue & getVF() const
Definition VPlan.h:4446
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:909
const VPBasicBlock * getEntry() const
Definition VPlan.h:4356
friend class VPlanPrinter
Definition VPlan.h:4267
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4524
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4539
unsigned getUF() const
Definition VPlan.h:4483
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4593
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1228
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4548
bool hasUF(unsigned UF) const
Definition VPlan.h:4481
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4403
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4442
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4439
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4505
void setVF(ElementCount VF)
Definition VPlan.h:4457
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4496
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4615
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1004
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4533
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4389
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4420
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4427
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4380
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4344
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4571
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1234
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4521
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4581
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1123
bool hasScalarVFOnly() const
Definition VPlan.h:4474
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4394
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:916
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1076
void addVF(ElementCount VF)
Definition VPlan.h:4455
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4399
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1038
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4360
void setUF(unsigned UF)
Definition VPlan.h:4488
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4625
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1164
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4337
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4527
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2483
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3882
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2390
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2388
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3913
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3927
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3932
static bool isPossible(SrcTy R)
Definition VPlan.h:3914
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3844
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3865
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3846
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3849
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3836
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2382
Possible variants of a reduction.
Definition VPlan.h:2380
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2385
unsigned VFScaleFactor
Definition VPlan.h:2386
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2351
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2363
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2343
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:639
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:644
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:634
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:627
PHINode & getIRPhi()
Definition VPlan.h:1501
VPIRPhi(PHINode &PN)
Definition VPlan.h:1494
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1496
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1512
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:184
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:137
static bool classof(const VPUser *U)
Definition VPlan.h:1388
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1403
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1418
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1385
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1398
static bool classof(const VPValue *V)
Definition VPlan.h:1393
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:922
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:928
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:923
static bool classof(const VPValue *V)
Definition VPlan.h:947
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:954
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:942
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:202
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3365
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3378
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3366
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3388
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3323
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3345
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3324
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3333
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3449
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3461
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3450
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3474
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3464
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3406
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3424
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3415
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3430
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3407