LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPBlendSC:
561 case VPRecipeBase::VPPredInstPHISC:
562 case VPRecipeBase::VPCanonicalIVPHISC:
563 case VPRecipeBase::VPActiveLaneMaskPHISC:
564 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565 case VPRecipeBase::VPWidenPHISC:
566 case VPRecipeBase::VPWidenIntOrFpInductionSC:
567 case VPRecipeBase::VPWidenPointerInductionSC:
568 case VPRecipeBase::VPReductionPHISC:
569 return true;
570 case VPRecipeBase::VPBranchOnMaskSC:
571 case VPRecipeBase::VPInterleaveEVLSC:
572 case VPRecipeBase::VPInterleaveSC:
573 case VPRecipeBase::VPIRInstructionSC:
574 case VPRecipeBase::VPWidenLoadEVLSC:
575 case VPRecipeBase::VPWidenLoadSC:
576 case VPRecipeBase::VPWidenStoreEVLSC:
577 case VPRecipeBase::VPWidenStoreSC:
578 case VPRecipeBase::VPHistogramSC:
579 // TODO: Widened stores don't define a value, but widened loads do. Split
580 // the recipes to be able to make widened loads VPSingleDefRecipes.
581 return false;
582 }
583 llvm_unreachable("Unhandled VPDefID");
584 }
585
586 static inline bool classof(const VPUser *U) {
587 auto *R = dyn_cast<VPRecipeBase>(U);
588 return R && classof(R);
589 }
590
591 VPSingleDefRecipe *clone() override = 0;
592
593 /// Returns the underlying instruction.
600
601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
602 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
604#endif
605};
606
607/// Class to record and manage LLVM IR flags.
609 enum class OperationType : unsigned char {
610 Cmp,
611 FCmp,
612 OverflowingBinOp,
613 Trunc,
614 DisjointOp,
615 PossiblyExactOp,
616 GEPOp,
617 FPMathOp,
618 NonNegOp,
619 ReductionOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
670 struct ReductionFlagsTy {
671 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
672 // additional kinds.
673 unsigned char Kind : 6;
674 // TODO: Derive order/in-loop from plan and remove here.
675 unsigned char IsOrdered : 1;
676 unsigned char IsInLoop : 1;
677 FastMathFlagsTy FMFs;
678
679 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
680 FastMathFlags FMFs)
681 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
682 IsInLoop(IsInLoop), FMFs(FMFs) {}
683 };
684
685 OperationType OpType;
686
687 union {
692 ExactFlagsTy ExactFlags;
695 FastMathFlagsTy FMFs;
696 FCmpFlagsTy FCmpFlags;
697 ReductionFlagsTy ReductionFlags;
698 unsigned AllFlags;
699 };
700
701public:
702 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
703
705 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
706 OpType = OperationType::FCmp;
707 FCmpFlags.Pred = FCmp->getPredicate();
708 FCmpFlags.FMFs = FCmp->getFastMathFlags();
709 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
710 OpType = OperationType::Cmp;
711 CmpPredicate = Op->getPredicate();
712 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
713 OpType = OperationType::DisjointOp;
714 DisjointFlags.IsDisjoint = Op->isDisjoint();
715 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
716 OpType = OperationType::OverflowingBinOp;
717 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
718 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
719 OpType = OperationType::Trunc;
720 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
721 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
722 OpType = OperationType::PossiblyExactOp;
723 ExactFlags.IsExact = Op->isExact();
724 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
725 OpType = OperationType::GEPOp;
726 GEPFlags = GEP->getNoWrapFlags();
727 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
728 OpType = OperationType::NonNegOp;
729 NonNegFlags.NonNeg = PNNI->hasNonNeg();
730 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
731 OpType = OperationType::FPMathOp;
732 FMFs = Op->getFastMathFlags();
733 } else {
734 OpType = OperationType::Other;
735 AllFlags = 0;
736 }
737 }
738
740 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
741
743 : OpType(OperationType::FCmp) {
744 FCmpFlags.Pred = Pred;
745 FCmpFlags.FMFs = FMFs;
746 }
747
749 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
750
752 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
753
754 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
755
757 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
758
760 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
761
763 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
764
765 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
766 : OpType(OperationType::ReductionOp),
767 ReductionFlags(Kind, IsOrdered, IsInLoop, FMFs) {}
768
770 OpType = Other.OpType;
771 AllFlags = Other.AllFlags;
772 }
773
774 /// Only keep flags also present in \p Other. \p Other must have the same
775 /// OpType as the current object.
776 void intersectFlags(const VPIRFlags &Other);
777
778 /// Drop all poison-generating flags.
780 // NOTE: This needs to be kept in-sync with
781 // Instruction::dropPoisonGeneratingFlags.
782 switch (OpType) {
783 case OperationType::OverflowingBinOp:
784 WrapFlags.HasNUW = false;
785 WrapFlags.HasNSW = false;
786 break;
787 case OperationType::Trunc:
788 TruncFlags.HasNUW = false;
789 TruncFlags.HasNSW = false;
790 break;
791 case OperationType::DisjointOp:
792 DisjointFlags.IsDisjoint = false;
793 break;
794 case OperationType::PossiblyExactOp:
795 ExactFlags.IsExact = false;
796 break;
797 case OperationType::GEPOp:
799 break;
800 case OperationType::FPMathOp:
801 case OperationType::FCmp:
802 case OperationType::ReductionOp:
803 getFMFsRef().NoNaNs = false;
804 getFMFsRef().NoInfs = false;
805 break;
806 case OperationType::NonNegOp:
807 NonNegFlags.NonNeg = false;
808 break;
809 case OperationType::Cmp:
810 case OperationType::Other:
811 break;
812 }
813 }
814
815 /// Apply the IR flags to \p I.
816 void applyFlags(Instruction &I) const {
817 switch (OpType) {
818 case OperationType::OverflowingBinOp:
819 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
820 I.setHasNoSignedWrap(WrapFlags.HasNSW);
821 break;
822 case OperationType::Trunc:
823 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
824 I.setHasNoSignedWrap(TruncFlags.HasNSW);
825 break;
826 case OperationType::DisjointOp:
827 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
828 break;
829 case OperationType::PossiblyExactOp:
830 I.setIsExact(ExactFlags.IsExact);
831 break;
832 case OperationType::GEPOp:
833 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
834 break;
835 case OperationType::FPMathOp:
836 case OperationType::FCmp: {
837 const FastMathFlagsTy &F = getFMFsRef();
838 I.setHasAllowReassoc(F.AllowReassoc);
839 I.setHasNoNaNs(F.NoNaNs);
840 I.setHasNoInfs(F.NoInfs);
841 I.setHasNoSignedZeros(F.NoSignedZeros);
842 I.setHasAllowReciprocal(F.AllowReciprocal);
843 I.setHasAllowContract(F.AllowContract);
844 I.setHasApproxFunc(F.ApproxFunc);
845 break;
846 }
847 case OperationType::NonNegOp:
848 I.setNonNeg(NonNegFlags.NonNeg);
849 break;
850 case OperationType::ReductionOp:
851 llvm_unreachable("reduction ops should not use applyFlags");
852 [[fallthrough]];
853 case OperationType::Cmp:
854 case OperationType::Other:
855 break;
856 }
857 }
858
860 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
861 "recipe doesn't have a compare predicate");
862 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
863 }
864
866 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
867 "recipe doesn't have a compare predicate");
868 if (OpType == OperationType::FCmp)
869 FCmpFlags.Pred = Pred;
870 else
871 CmpPredicate = Pred;
872 }
873
875
876 /// Returns true if the recipe has a comparison predicate.
877 bool hasPredicate() const {
878 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
879 }
880
881 /// Returns true if the recipe has fast-math flags.
882 bool hasFastMathFlags() const {
883 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
884 OpType == OperationType::ReductionOp;
885 }
886
888
889 /// Returns true if the recipe has non-negative flag.
890 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
891
892 bool isNonNeg() const {
893 assert(OpType == OperationType::NonNegOp &&
894 "recipe doesn't have a NNEG flag");
895 return NonNegFlags.NonNeg;
896 }
897
898 bool hasNoUnsignedWrap() const {
899 switch (OpType) {
900 case OperationType::OverflowingBinOp:
901 return WrapFlags.HasNUW;
902 case OperationType::Trunc:
903 return TruncFlags.HasNUW;
904 default:
905 llvm_unreachable("recipe doesn't have a NUW flag");
906 }
907 }
908
909 bool hasNoSignedWrap() const {
910 switch (OpType) {
911 case OperationType::OverflowingBinOp:
912 return WrapFlags.HasNSW;
913 case OperationType::Trunc:
914 return TruncFlags.HasNSW;
915 default:
916 llvm_unreachable("recipe doesn't have a NSW flag");
917 }
918 }
919
920 bool isDisjoint() const {
921 assert(OpType == OperationType::DisjointOp &&
922 "recipe cannot have a disjoing flag");
923 return DisjointFlags.IsDisjoint;
924 }
925
927 assert(OpType == OperationType::ReductionOp &&
928 "recipe doesn't have reduction flags");
929 return static_cast<RecurKind>(ReductionFlags.Kind);
930 }
931
932 bool isReductionOrdered() const {
933 assert(OpType == OperationType::ReductionOp &&
934 "recipe doesn't have reduction flags");
935 return ReductionFlags.IsOrdered;
936 }
937
938 bool isReductionInLoop() const {
939 assert(OpType == OperationType::ReductionOp &&
940 "recipe doesn't have reduction flags");
941 return ReductionFlags.IsInLoop;
942 }
943
944private:
945 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
946 FastMathFlagsTy &getFMFsRef() {
947 if (OpType == OperationType::FCmp)
948 return FCmpFlags.FMFs;
949 if (OpType == OperationType::ReductionOp)
950 return ReductionFlags.FMFs;
951 return FMFs;
952 }
953 const FastMathFlagsTy &getFMFsRef() const {
954 if (OpType == OperationType::FCmp)
955 return FCmpFlags.FMFs;
956 if (OpType == OperationType::ReductionOp)
957 return ReductionFlags.FMFs;
958 return FMFs;
959 }
960
961public:
962#if !defined(NDEBUG)
963 /// Returns true if the set flags are valid for \p Opcode.
964 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
965#endif
966
967#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
968 void printFlags(raw_ostream &O) const;
969#endif
970};
971
972/// A pure-virtual common base class for recipes defining a single VPValue and
973/// using IR flags.
975 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
976 const VPIRFlags &Flags,
978 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
979
980 static inline bool classof(const VPRecipeBase *R) {
981 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
982 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
983 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
984 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
985 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
986 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
987 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
988 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
989 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
990 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
991 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
992 }
993
994 static inline bool classof(const VPUser *U) {
995 auto *R = dyn_cast<VPRecipeBase>(U);
996 return R && classof(R);
997 }
998
999 static inline bool classof(const VPValue *V) {
1000 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
1001 return R && classof(R);
1002 }
1003
1005
1006 static inline bool classof(const VPSingleDefRecipe *U) {
1007 auto *R = dyn_cast<VPRecipeBase>(U);
1008 return R && classof(R);
1009 }
1010
1011 void execute(VPTransformState &State) override = 0;
1012
1013 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1015 VPCostContext &Ctx) const;
1016};
1017
1018/// Helper to access the operand that contains the unroll part for this recipe
1019/// after unrolling.
1020template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1021protected:
1022 /// Return the VPValue operand containing the unroll part or null if there is
1023 /// no such operand.
1024 VPValue *getUnrollPartOperand(const VPUser &U) const;
1025
1026 /// Return the unroll part.
1027 unsigned getUnrollPart(const VPUser &U) const;
1028};
1029
1030/// Helper to manage IR metadata for recipes. It filters out metadata that
1031/// cannot be propagated.
1034
1035public:
1036 VPIRMetadata() = default;
1037
1038 /// Adds metatadata that can be preserved from the original instruction
1039 /// \p I.
1041
1042 /// Copy constructor for cloning.
1044
1046
1047 /// Add all metadata to \p I.
1048 void applyMetadata(Instruction &I) const;
1049
1050 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1051 /// already exists, it will be replaced. Otherwise, it will be added.
1052 void setMetadata(unsigned Kind, MDNode *Node) {
1053 auto It =
1054 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1055 return P.first == Kind;
1056 });
1057 if (It != Metadata.end())
1058 It->second = Node;
1059 else
1060 Metadata.emplace_back(Kind, Node);
1061 }
1062
1063 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1064 /// nodes that are common to both.
1065 void intersect(const VPIRMetadata &MD);
1066
1067 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1068 MDNode *getMetadata(unsigned Kind) const {
1069 auto It =
1070 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1071 return It != Metadata.end() ? It->second : nullptr;
1072 }
1073
1074#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1075 /// Print metadata with node IDs.
1076 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1077#endif
1078};
1079
1080/// This is a concrete Recipe that models a single VPlan-level instruction.
1081/// While as any Recipe it may generate a sequence of IR instructions when
1082/// executed, these instructions would always form a single-def expression as
1083/// the VPInstruction is also a single def-use vertex.
1085 public VPIRMetadata,
1086 public VPUnrollPartAccessor<1> {
1087 friend class VPlanSlp;
1088
1089public:
1090 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1091 enum {
1093 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1094 // values of a first-order recurrence.
1098 // Creates a mask where each lane is active (true) whilst the current
1099 // counter (first operand + index) is less than the second operand. i.e.
1100 // mask[i] = icmpt ult (op0 + i), op1
1101 // The size of the mask returned is VF * Multiplier (UF, third op).
1105 // Increment the canonical IV separately for each unrolled part.
1107 // Abstract instruction that compares two values and branches. This is
1108 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1111 // Branch with 2 boolean condition operands and 3 successors. If condition
1112 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1113 // successor 1; otherwise branches to successor 2. Expanded after region
1114 // dissolution into: (1) an OR of the two conditions branching to
1115 // middle.split or successor 2, and (2) middle.split branching to successor
1116 // 0 or successor 1 based on condition 0.
1119 /// Given operands of (the same) struct type, creates a struct of fixed-
1120 /// width vectors each containing a struct field of all operands. The
1121 /// number of operands matches the element count of every vector.
1123 /// Creates a fixed-width vector containing all operands. The number of
1124 /// operands matches the vector element count.
1126 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1127 /// abstract VPInstruction whose single defined VPValue represents VF
1128 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1129 /// VPInstructions.
1131 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1132 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1136 // Extracts the last part of its operand. Removed during unrolling.
1138 // Extracts the last lane of its vector operand, per part.
1140 // Extracts the second-to-last lane from its operand or the second-to-last
1141 // part if it is scalar. In the latter case, the recipe will be removed
1142 // during unrolling.
1144 LogicalAnd, // Non-poison propagating logical And.
1145 // Add an offset in bytes (second operand) to a base pointer (first
1146 // operand). Only generates scalar values (either for the first lane only or
1147 // for all lanes, depending on its uses).
1149 // Add a vector offset in bytes (second operand) to a scalar base pointer
1150 // (first operand).
1152 // Returns a scalar boolean value, which is true if any lane of its
1153 // (boolean) vector operands is true. It produces the reduced value across
1154 // all unrolled iterations. Unrolling will add all copies of its original
1155 // operand as additional operands. AnyOf is poison-safe as all operands
1156 // will be frozen.
1158 // Calculates the first active lane index of the vector predicate operands.
1159 // It produces the lane index across all unrolled iterations. Unrolling will
1160 // add all copies of its original operand as additional operands.
1161 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1162 // result even with operands that are all zeroes.
1164 // Calculates the last active lane index of the vector predicate operands.
1165 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1166 // tail-folding to extract the correct live-out value from the last active
1167 // iteration. It produces the lane index across all unrolled iterations.
1168 // Unrolling will add all copies of its original operand as additional
1169 // operands.
1171 // Returns a reversed vector for the operand.
1173
1174 // The opcodes below are used for VPInstructionWithType.
1175 //
1176 /// Scale the first operand (vector step) by the second operand
1177 /// (scalar-step). Casts both operands to the result type if needed.
1179 /// Start vector for reductions with 3 operands: the original start value,
1180 /// the identity value for the reduction and an integer indicating the
1181 /// scaling factor.
1183 // Creates a step vector starting from 0 to VF with a step of 1.
1185 /// Extracts a single lane (first operand) from a set of vector operands.
1186 /// The lane specifies an index into a vector formed by combining all vector
1187 /// operands (all operands after the first one).
1189 /// Explicit user for the resume phi of the canonical induction in the main
1190 /// VPlan, used by the epilogue vector loop.
1192 /// Extracts the lane from the first operand corresponding to the last
1193 /// active (non-zero) lane in the mask (second operand), or if no lanes
1194 /// were active in the mask, returns the default value (third operand).
1196
1197 /// Returns the value for vscale.
1200 };
1201
1202 /// Returns true if this VPInstruction generates scalar values for all lanes.
1203 /// Most VPInstructions generate a single value per part, either vector or
1204 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1205 /// values per all lanes, stemming from an original ingredient. This method
1206 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1207 /// underlying ingredient.
1208 bool doesGeneratePerAllLanes() const;
1209
1210 /// Return the number of operands determined by the opcode of the
1211 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1212 /// directly by the opcode.
1213 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1214
1215private:
1216 typedef unsigned char OpcodeTy;
1217 OpcodeTy Opcode;
1218
1219 /// An optional name that can be used for the generated IR instruction.
1220 std::string Name;
1221
1222 /// Returns true if we can generate a scalar for the first lane only if
1223 /// needed.
1224 bool canGenerateScalarForFirstLane() const;
1225
1226 /// Utility methods serving execute(): generates a single vector instance of
1227 /// the modeled instruction. \returns the generated value. . In some cases an
1228 /// existing value is returned rather than a generated one.
1229 Value *generate(VPTransformState &State);
1230
1231public:
1232 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1233 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1234 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1235
1236 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1237
1238 VPInstruction *clone() override {
1239 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1240 getDebugLoc(), Name);
1241 if (getUnderlyingValue())
1242 New->setUnderlyingValue(getUnderlyingInstr());
1243 return New;
1244 }
1245
1246 unsigned getOpcode() const { return Opcode; }
1247
1248 /// Generate the instruction.
1249 /// TODO: We currently execute only per-part unless a specific instance is
1250 /// provided.
1251 void execute(VPTransformState &State) override;
1252
1253 /// Return the cost of this VPInstruction.
1254 InstructionCost computeCost(ElementCount VF,
1255 VPCostContext &Ctx) const override;
1256
1257#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1258 /// Print the VPInstruction to dbgs() (for debugging).
1259 LLVM_DUMP_METHOD void dump() const;
1260#endif
1261
1262 bool hasResult() const {
1263 // CallInst may or may not have a result, depending on the called function.
1264 // Conservatively return calls have results for now.
1265 switch (getOpcode()) {
1266 case Instruction::Ret:
1267 case Instruction::Br:
1268 case Instruction::Store:
1269 case Instruction::Switch:
1270 case Instruction::IndirectBr:
1271 case Instruction::Resume:
1272 case Instruction::CatchRet:
1273 case Instruction::Unreachable:
1274 case Instruction::Fence:
1275 case Instruction::AtomicRMW:
1279 return false;
1280 default:
1281 return true;
1282 }
1283 }
1284
1285 /// Returns true if the underlying opcode may read from or write to memory.
1286 bool opcodeMayReadOrWriteFromMemory() const;
1287
1288 /// Returns true if the recipe only uses the first lane of operand \p Op.
1289 bool usesFirstLaneOnly(const VPValue *Op) const override;
1290
1291 /// Returns true if the recipe only uses the first part of operand \p Op.
1292 bool usesFirstPartOnly(const VPValue *Op) const override;
1293
1294 /// Returns true if this VPInstruction produces a scalar value from a vector,
1295 /// e.g. by performing a reduction or extracting a lane.
1296 bool isVectorToScalar() const;
1297
1298 /// Returns true if this VPInstruction's operands are single scalars and the
1299 /// result is also a single scalar.
1300 bool isSingleScalar() const;
1301
1302 /// Returns the symbolic name assigned to the VPInstruction.
1303 StringRef getName() const { return Name; }
1304
1305 /// Set the symbolic name for the VPInstruction.
1306 void setName(StringRef NewName) { Name = NewName.str(); }
1307
1308protected:
1309#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1310 /// Print the VPInstruction to \p O.
1311 void printRecipe(raw_ostream &O, const Twine &Indent,
1312 VPSlotTracker &SlotTracker) const override;
1313#endif
1314};
1315
1316/// A specialization of VPInstruction augmenting it with a dedicated result
1317/// type, to be used when the opcode and operands of the VPInstruction don't
1318/// directly determine the result type. Note that there is no separate VPDef ID
1319/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1320/// distinguished purely by the opcode.
1322 /// Scalar result type produced by the recipe.
1323 Type *ResultTy;
1324
1325public:
1327 Type *ResultTy, const VPIRFlags &Flags = {},
1328 const VPIRMetadata &Metadata = {},
1330 const Twine &Name = "")
1331 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1332 ResultTy(ResultTy) {}
1333
1334 static inline bool classof(const VPRecipeBase *R) {
1335 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1336 // type information.
1337 if (R->isScalarCast())
1338 return true;
1339 auto *VPI = dyn_cast<VPInstruction>(R);
1340 if (!VPI)
1341 return false;
1342 switch (VPI->getOpcode()) {
1346 return true;
1347 default:
1348 return false;
1349 }
1350 }
1351
1352 static inline bool classof(const VPUser *R) {
1354 }
1355
1356 VPInstruction *clone() override {
1357 auto *New =
1359 *this, *this, getDebugLoc(), getName());
1360 New->setUnderlyingValue(getUnderlyingValue());
1361 return New;
1362 }
1363
1364 void execute(VPTransformState &State) override;
1365
1366 /// Return the cost of this VPInstruction.
1368 VPCostContext &Ctx) const override {
1369 // TODO: Compute accurate cost after retiring the legacy cost model.
1370 return 0;
1371 }
1372
1373 Type *getResultType() const { return ResultTy; }
1374
1375protected:
1376#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1377 /// Print the recipe.
1378 void printRecipe(raw_ostream &O, const Twine &Indent,
1379 VPSlotTracker &SlotTracker) const override;
1380#endif
1381};
1382
1383/// Helper type to provide functions to access incoming values and blocks for
1384/// phi-like recipes.
1386protected:
1387 /// Return a VPRecipeBase* to the current object.
1388 virtual const VPRecipeBase *getAsRecipe() const = 0;
1389
1390public:
1391 virtual ~VPPhiAccessors() = default;
1392
1393 /// Returns the incoming VPValue with index \p Idx.
1394 VPValue *getIncomingValue(unsigned Idx) const {
1395 return getAsRecipe()->getOperand(Idx);
1396 }
1397
1398 /// Returns the incoming block with index \p Idx.
1399 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1400
1401 /// Returns the number of incoming values, also number of incoming blocks.
1402 virtual unsigned getNumIncoming() const {
1403 return getAsRecipe()->getNumOperands();
1404 }
1405
1406 /// Returns an interator range over the incoming values.
1408 return make_range(getAsRecipe()->op_begin(),
1409 getAsRecipe()->op_begin() + getNumIncoming());
1410 }
1411
1413 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1414
1415 /// Returns an iterator range over the incoming blocks.
1417 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1418 return getIncomingBlock(Idx);
1419 };
1420 return map_range(index_range(0, getNumIncoming()), GetBlock);
1421 }
1422
1423 /// Returns an iterator range over pairs of incoming values and corresponding
1424 /// incoming blocks.
1430
1431 /// Removes the incoming value for \p IncomingBlock, which must be a
1432 /// predecessor.
1433 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1434
1435#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1436 /// Print the recipe.
1438#endif
1439};
1440
1442 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1443 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1444
1445 static inline bool classof(const VPUser *U) {
1446 auto *VPI = dyn_cast<VPInstruction>(U);
1447 return VPI && VPI->getOpcode() == Instruction::PHI;
1448 }
1449
1450 static inline bool classof(const VPValue *V) {
1451 auto *VPI = dyn_cast<VPInstruction>(V);
1452 return VPI && VPI->getOpcode() == Instruction::PHI;
1453 }
1454
1455 static inline bool classof(const VPSingleDefRecipe *SDR) {
1456 auto *VPI = dyn_cast<VPInstruction>(SDR);
1457 return VPI && VPI->getOpcode() == Instruction::PHI;
1458 }
1459
1460 VPPhi *clone() override {
1461 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1462 PhiR->setUnderlyingValue(getUnderlyingValue());
1463 return PhiR;
1464 }
1465
1466 void execute(VPTransformState &State) override;
1467
1468protected:
1469#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1470 /// Print the recipe.
1471 void printRecipe(raw_ostream &O, const Twine &Indent,
1472 VPSlotTracker &SlotTracker) const override;
1473#endif
1474
1475 const VPRecipeBase *getAsRecipe() const override { return this; }
1476};
1477
1478/// A recipe to wrap on original IR instruction not to be modified during
1479/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1480/// Expect PHIs, VPIRInstructions cannot have any operands.
1482 Instruction &I;
1483
1484protected:
1485 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1486 /// subclasses may need to be created, e.g. VPIRPhi.
1488 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1489
1490public:
1491 ~VPIRInstruction() override = default;
1492
1493 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1494 /// VPIRInstruction.
1496
1497 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1498
1500 auto *R = create(I);
1501 for (auto *Op : operands())
1502 R->addOperand(Op);
1503 return R;
1504 }
1505
1506 void execute(VPTransformState &State) override;
1507
1508 /// Return the cost of this VPIRInstruction.
1510 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1511
1512 Instruction &getInstruction() const { return I; }
1513
1514 bool usesScalars(const VPValue *Op) const override {
1516 "Op must be an operand of the recipe");
1517 return true;
1518 }
1519
1520 bool usesFirstPartOnly(const VPValue *Op) const override {
1522 "Op must be an operand of the recipe");
1523 return true;
1524 }
1525
1526 bool usesFirstLaneOnly(const VPValue *Op) const override {
1528 "Op must be an operand of the recipe");
1529 return true;
1530 }
1531
1532 /// Update the recipe's first operand to the last lane of the last part of the
1533 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1534 /// least one operand wrapping a PHINode.
1536
1537protected:
1538#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1539 /// Print the recipe.
1540 void printRecipe(raw_ostream &O, const Twine &Indent,
1541 VPSlotTracker &SlotTracker) const override;
1542#endif
1543};
1544
1545/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1546/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1547/// allowed, and it is used to add a new incoming value for the single
1548/// predecessor VPBB.
1550 public VPPhiAccessors {
1552
1553 static inline bool classof(const VPRecipeBase *U) {
1554 auto *R = dyn_cast<VPIRInstruction>(U);
1555 return R && isa<PHINode>(R->getInstruction());
1556 }
1557
1559
1560 void execute(VPTransformState &State) override;
1561
1562protected:
1563#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1564 /// Print the recipe.
1565 void printRecipe(raw_ostream &O, const Twine &Indent,
1566 VPSlotTracker &SlotTracker) const override;
1567#endif
1568
1569 const VPRecipeBase *getAsRecipe() const override { return this; }
1570};
1571
1572/// VPWidenRecipe is a recipe for producing a widened instruction using the
1573/// opcode and operands of the recipe. This recipe covers most of the
1574/// traditional vectorization cases where each recipe transforms into a
1575/// vectorized version of itself.
1577 public VPIRMetadata {
1578 unsigned Opcode;
1579
1580public:
1582 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1583 DebugLoc DL = {})
1584 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1585 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1586 setUnderlyingValue(&I);
1587 }
1588
1589 ~VPWidenRecipe() override = default;
1590
1591 VPWidenRecipe *clone() override {
1592 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1593 getDebugLoc());
1594 }
1595
1596 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1597
1598 /// Produce a widened instruction using the opcode and operands of the recipe,
1599 /// processing State.VF elements.
1600 void execute(VPTransformState &State) override;
1601
1602 /// Return the cost of this VPWidenRecipe.
1603 InstructionCost computeCost(ElementCount VF,
1604 VPCostContext &Ctx) const override;
1605
1606 unsigned getOpcode() const { return Opcode; }
1607
1608protected:
1609#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1610 /// Print the recipe.
1611 void printRecipe(raw_ostream &O, const Twine &Indent,
1612 VPSlotTracker &SlotTracker) const override;
1613#endif
1614
1615 /// Returns true if the recipe only uses the first lane of operand \p Op.
1616 bool usesFirstLaneOnly(const VPValue *Op) const override {
1618 "Op must be an operand of the recipe");
1619 return Opcode == Instruction::Select && Op == getOperand(0) &&
1620 Op->isDefinedOutsideLoopRegions();
1621 }
1622};
1623
1624/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1626 /// Cast instruction opcode.
1627 Instruction::CastOps Opcode;
1628
1629 /// Result type for the cast.
1630 Type *ResultTy;
1631
1632public:
1634 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1635 const VPIRMetadata &Metadata = {},
1637 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1638 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1639 assert(flagsValidForOpcode(Opcode) &&
1640 "Set flags not supported for the provided opcode");
1642 }
1643
1644 ~VPWidenCastRecipe() override = default;
1645
1647 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1649 *this, *this, getDebugLoc());
1650 }
1651
1652 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1653
1654 /// Produce widened copies of the cast.
1655 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1656
1657 /// Return the cost of this VPWidenCastRecipe.
1659 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1660
1661 Instruction::CastOps getOpcode() const { return Opcode; }
1662
1663 /// Returns the result type of the cast.
1664 Type *getResultType() const { return ResultTy; }
1665
1666protected:
1667#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1668 /// Print the recipe.
1669 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1670 VPSlotTracker &SlotTracker) const override;
1671#endif
1672};
1673
1674/// A recipe for widening vector intrinsics.
1676 /// ID of the vector intrinsic to widen.
1677 Intrinsic::ID VectorIntrinsicID;
1678
1679 /// Scalar return type of the intrinsic.
1680 Type *ResultTy;
1681
1682 /// True if the intrinsic may read from memory.
1683 bool MayReadFromMemory;
1684
1685 /// True if the intrinsic may read write to memory.
1686 bool MayWriteToMemory;
1687
1688 /// True if the intrinsic may have side-effects.
1689 bool MayHaveSideEffects;
1690
1691public:
1693 ArrayRef<VPValue *> CallArguments, Type *Ty,
1694 const VPIRFlags &Flags = {},
1695 const VPIRMetadata &MD = {},
1697 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1698 DL),
1699 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1700 MayReadFromMemory(CI.mayReadFromMemory()),
1701 MayWriteToMemory(CI.mayWriteToMemory()),
1702 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1703 setUnderlyingValue(&CI);
1704 }
1705
1707 ArrayRef<VPValue *> CallArguments, Type *Ty,
1708 const VPIRFlags &Flags = {},
1709 const VPIRMetadata &Metadata = {},
1711 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1712 DL),
1713 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1714 ResultTy(Ty) {
1715 LLVMContext &Ctx = Ty->getContext();
1716 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1717 MemoryEffects ME = Attrs.getMemoryEffects();
1718 MayReadFromMemory = !ME.onlyWritesMemory();
1719 MayWriteToMemory = !ME.onlyReadsMemory();
1720 MayHaveSideEffects = MayWriteToMemory ||
1721 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1722 !Attrs.hasAttribute(Attribute::WillReturn);
1723 }
1724
1725 ~VPWidenIntrinsicRecipe() override = default;
1726
1728 if (Value *CI = getUnderlyingValue())
1729 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1730 operands(), ResultTy, *this, *this,
1731 getDebugLoc());
1732 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1733 *this, *this, getDebugLoc());
1734 }
1735
1736 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1737
1738 /// Produce a widened version of the vector intrinsic.
1739 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1740
1741 /// Return the cost of this vector intrinsic.
1743 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1744
1745 /// Return the ID of the intrinsic.
1746 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1747
1748 /// Return the scalar return type of the intrinsic.
1749 Type *getResultType() const { return ResultTy; }
1750
1751 /// Return to name of the intrinsic as string.
1753
1754 /// Returns true if the intrinsic may read from memory.
1755 bool mayReadFromMemory() const { return MayReadFromMemory; }
1756
1757 /// Returns true if the intrinsic may write to memory.
1758 bool mayWriteToMemory() const { return MayWriteToMemory; }
1759
1760 /// Returns true if the intrinsic may have side-effects.
1761 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1762
1763 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1764
1765protected:
1766#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1767 /// Print the recipe.
1768 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1769 VPSlotTracker &SlotTracker) const override;
1770#endif
1771};
1772
1773/// A recipe for widening Call instructions using library calls.
1775 public VPIRMetadata {
1776 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1777 /// between a given VF and the chosen vectorized variant, so there will be a
1778 /// different VPlan for each VF with a valid variant.
1779 Function *Variant;
1780
1781public:
1783 ArrayRef<VPValue *> CallArguments,
1784 const VPIRFlags &Flags = {},
1785 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1786 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1787 VPIRMetadata(Metadata), Variant(Variant) {
1788 setUnderlyingValue(UV);
1789 assert(
1790 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1791 "last operand must be the called function");
1792 }
1793
1794 ~VPWidenCallRecipe() override = default;
1795
1797 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1798 *this, *this, getDebugLoc());
1799 }
1800
1801 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1802
1803 /// Produce a widened version of the call instruction.
1804 void execute(VPTransformState &State) override;
1805
1806 /// Return the cost of this VPWidenCallRecipe.
1807 InstructionCost computeCost(ElementCount VF,
1808 VPCostContext &Ctx) const override;
1809
1813
1816
1817protected:
1818#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1819 /// Print the recipe.
1820 void printRecipe(raw_ostream &O, const Twine &Indent,
1821 VPSlotTracker &SlotTracker) const override;
1822#endif
1823};
1824
1825/// A recipe representing a sequence of load -> update -> store as part of
1826/// a histogram operation. This means there may be aliasing between vector
1827/// lanes, which is handled by the llvm.experimental.vector.histogram family
1828/// of intrinsics. The only update operations currently supported are
1829/// 'add' and 'sub' where the other term is loop-invariant.
1831 /// Opcode of the update operation, currently either add or sub.
1832 unsigned Opcode;
1833
1834public:
1835 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1837 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1838
1839 ~VPHistogramRecipe() override = default;
1840
1842 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1843 }
1844
1845 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1846
1847 /// Produce a vectorized histogram operation.
1848 void execute(VPTransformState &State) override;
1849
1850 /// Return the cost of this VPHistogramRecipe.
1852 VPCostContext &Ctx) const override;
1853
1854 unsigned getOpcode() const { return Opcode; }
1855
1856 /// Return the mask operand if one was provided, or a null pointer if all
1857 /// lanes should be executed unconditionally.
1858 VPValue *getMask() const {
1859 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1860 }
1861
1862protected:
1863#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1864 /// Print the recipe
1865 void printRecipe(raw_ostream &O, const Twine &Indent,
1866 VPSlotTracker &SlotTracker) const override;
1867#endif
1868};
1869
1870/// A recipe for handling GEP instructions.
1872 Type *SourceElementTy;
1873
1874 bool isPointerLoopInvariant() const {
1875 return getOperand(0)->isDefinedOutsideLoopRegions();
1876 }
1877
1878 bool isIndexLoopInvariant(unsigned I) const {
1879 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1880 }
1881
1882public:
1884 const VPIRFlags &Flags = {},
1886 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1887 SourceElementTy(GEP->getSourceElementType()) {
1888 setUnderlyingValue(GEP);
1890 (void)Metadata;
1892 assert(Metadata.empty() && "unexpected metadata on GEP");
1893 }
1894
1895 ~VPWidenGEPRecipe() override = default;
1896
1899 operands(), *this, getDebugLoc());
1900 }
1901
1902 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1903
1904 /// This recipe generates a GEP instruction.
1905 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1906
1907 /// Generate the gep nodes.
1908 void execute(VPTransformState &State) override;
1909
1910 Type *getSourceElementType() const { return SourceElementTy; }
1911
1912 /// Return the cost of this VPWidenGEPRecipe.
1914 VPCostContext &Ctx) const override {
1915 // TODO: Compute accurate cost after retiring the legacy cost model.
1916 return 0;
1917 }
1918
1919 /// Returns true if the recipe only uses the first lane of operand \p Op.
1920 bool usesFirstLaneOnly(const VPValue *Op) const override;
1921
1922protected:
1923#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1924 /// Print the recipe.
1925 void printRecipe(raw_ostream &O, const Twine &Indent,
1926 VPSlotTracker &SlotTracker) const override;
1927#endif
1928};
1929
1930/// A recipe to compute a pointer to the last element of each part of a widened
1931/// memory access for widened memory accesses of IndexedTy. Used for
1932/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1934 public VPUnrollPartAccessor<2> {
1935 Type *IndexedTy;
1936
1937 /// The constant stride of the pointer computed by this recipe, expressed in
1938 /// units of IndexedTy.
1939 int64_t Stride;
1940
1941public:
1943 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1944 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1945 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1946 IndexedTy(IndexedTy), Stride(Stride) {
1947 assert(Stride < 0 && "Stride must be negative");
1948 }
1949
1950 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1951
1953 const VPValue *getVFValue() const { return getOperand(1); }
1954
1955 void execute(VPTransformState &State) override;
1956
1957 bool usesFirstLaneOnly(const VPValue *Op) const override {
1959 "Op must be an operand of the recipe");
1960 return true;
1961 }
1962
1963 /// Return the cost of this VPVectorPointerRecipe.
1965 VPCostContext &Ctx) const override {
1966 // TODO: Compute accurate cost after retiring the legacy cost model.
1967 return 0;
1968 }
1969
1970 /// Returns true if the recipe only uses the first part of operand \p Op.
1971 bool usesFirstPartOnly(const VPValue *Op) const override {
1973 "Op must be an operand of the recipe");
1974 assert(getNumOperands() <= 2 && "must have at most two operands");
1975 return true;
1976 }
1977
1979 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1980 Stride, getGEPNoWrapFlags(),
1981 getDebugLoc());
1982 }
1983
1984protected:
1985#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1986 /// Print the recipe.
1987 void printRecipe(raw_ostream &O, const Twine &Indent,
1988 VPSlotTracker &SlotTracker) const override;
1989#endif
1990};
1991
1992/// A recipe to compute the pointers for widened memory accesses of \p
1993/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
1994/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
1996 Type *SourceElementTy;
1997
1998public:
1999 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2001 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
2002 SourceElementTy(SourceElementTy) {}
2003
2004 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
2005
2007 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2008 }
2009
2010 void execute(VPTransformState &State) override;
2011
2012 Type *getSourceElementType() const { return SourceElementTy; }
2013
2014 bool usesFirstLaneOnly(const VPValue *Op) const override {
2016 "Op must be an operand of the recipe");
2017 return true;
2018 }
2019
2020 /// Returns true if the recipe only uses the first part of operand \p Op.
2021 bool usesFirstPartOnly(const VPValue *Op) const override {
2023 "Op must be an operand of the recipe");
2024 assert(getNumOperands() <= 2 && "must have at most two operands");
2025 return true;
2026 }
2027
2029 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2031 if (auto *Off = getOffset())
2032 Clone->addOperand(Off);
2033 return Clone;
2034 }
2035
2036 /// Return the cost of this VPHeaderPHIRecipe.
2038 VPCostContext &Ctx) const override {
2039 // TODO: Compute accurate cost after retiring the legacy cost model.
2040 return 0;
2041 }
2042
2043protected:
2044#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2045 /// Print the recipe.
2046 void printRecipe(raw_ostream &O, const Twine &Indent,
2047 VPSlotTracker &SlotTracker) const override;
2048#endif
2049};
2050
2051/// A pure virtual base class for all recipes modeling header phis, including
2052/// phis for first order recurrences, pointer inductions and reductions. The
2053/// start value is the first operand of the recipe and the incoming value from
2054/// the backedge is the second operand.
2055///
2056/// Inductions are modeled using the following sub-classes:
2057/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2058/// starting at a specified value (zero for the main vector loop, the resume
2059/// value for the epilogue vector loop) and stepping by 1. The induction
2060/// controls exiting of the vector loop by comparing against the vector trip
2061/// count. Produces a single scalar PHI for the induction value per
2062/// iteration.
2063/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2064/// floating point inductions with arbitrary start and step values. Produces
2065/// a vector PHI per-part.
2066/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2067/// value of an IV with different start and step values. Produces a single
2068/// scalar value per iteration
2069/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2070/// canonical or derived induction.
2071/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2072/// pointer induction. Produces either a vector PHI per-part or scalar values
2073/// per-lane based on the canonical induction.
2075 public VPPhiAccessors {
2076protected:
2077 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2078 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2079 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2080 UnderlyingInstr, DL) {}
2081
2082 const VPRecipeBase *getAsRecipe() const override { return this; }
2083
2084public:
2085 ~VPHeaderPHIRecipe() override = default;
2086
2087 /// Method to support type inquiry through isa, cast, and dyn_cast.
2088 static inline bool classof(const VPRecipeBase *R) {
2089 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2090 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2091 }
2092 static inline bool classof(const VPValue *V) {
2093 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2094 }
2095 static inline bool classof(const VPSingleDefRecipe *R) {
2096 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2097 }
2098
2099 /// Generate the phi nodes.
2100 void execute(VPTransformState &State) override = 0;
2101
2102 /// Return the cost of this header phi recipe.
2104 VPCostContext &Ctx) const override;
2105
2106 /// Returns the start value of the phi, if one is set.
2108 return getNumOperands() == 0 ? nullptr : getOperand(0);
2109 }
2111 return getNumOperands() == 0 ? nullptr : getOperand(0);
2112 }
2113
2114 /// Update the start value of the recipe.
2116
2117 /// Returns the incoming value from the loop backedge.
2119 return getOperand(1);
2120 }
2121
2122 /// Update the incoming value from the loop backedge.
2124
2125 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2126 /// to be a recipe.
2128 return *getBackedgeValue()->getDefiningRecipe();
2129 }
2130
2131protected:
2132#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2133 /// Print the recipe.
2134 void printRecipe(raw_ostream &O, const Twine &Indent,
2135 VPSlotTracker &SlotTracker) const override = 0;
2136#endif
2137};
2138
2139/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2140/// VPWidenPointerInductionRecipe), providing shared functionality, including
2141/// retrieving the step value, induction descriptor and original phi node.
2143 const InductionDescriptor &IndDesc;
2144
2145public:
2146 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2147 VPValue *Step, const InductionDescriptor &IndDesc,
2148 DebugLoc DL)
2149 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2150 addOperand(Step);
2151 }
2152
2153 static inline bool classof(const VPRecipeBase *R) {
2154 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2155 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2156 }
2157
2158 static inline bool classof(const VPValue *V) {
2159 auto *R = V->getDefiningRecipe();
2160 return R && classof(R);
2161 }
2162
2163 static inline bool classof(const VPSingleDefRecipe *R) {
2164 return classof(static_cast<const VPRecipeBase *>(R));
2165 }
2166
2167 void execute(VPTransformState &State) override = 0;
2168
2169 /// Returns the start value of the induction.
2171
2172 /// Returns the step value of the induction.
2174 const VPValue *getStepValue() const { return getOperand(1); }
2175
2176 /// Update the step value of the recipe.
2177 void setStepValue(VPValue *V) { setOperand(1, V); }
2178
2180 const VPValue *getVFValue() const { return getOperand(2); }
2181
2182 /// Returns the number of incoming values, also number of incoming blocks.
2183 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2184 /// incoming value, its start value.
2185 unsigned getNumIncoming() const override { return 1; }
2186
2188
2189 /// Returns the induction descriptor for the recipe.
2190 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2191
2193 // TODO: All operands of base recipe must exist and be at same index in
2194 // derived recipe.
2196 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2197 }
2198
2200 // TODO: All operands of base recipe must exist and be at same index in
2201 // derived recipe.
2203 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2204 }
2205
2206 /// Returns true if the recipe only uses the first lane of operand \p Op.
2207 bool usesFirstLaneOnly(const VPValue *Op) const override {
2209 "Op must be an operand of the recipe");
2210 // The recipe creates its own wide start value, so it only requests the
2211 // first lane of the operand.
2212 // TODO: Remove once creating the start value is modeled separately.
2213 return Op == getStartValue() || Op == getStepValue();
2214 }
2215};
2216
2217/// A recipe for handling phi nodes of integer and floating-point inductions,
2218/// producing their vector values. This is an abstract recipe and must be
2219/// converted to concrete recipes before executing.
2221 public VPIRFlags {
2222 TruncInst *Trunc;
2223
2224 // If this recipe is unrolled it will have 2 additional operands.
2225 bool isUnrolled() const { return getNumOperands() == 5; }
2226
2227public:
2229 VPValue *VF, const InductionDescriptor &IndDesc,
2230 const VPIRFlags &Flags, DebugLoc DL)
2231 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2232 Step, IndDesc, DL),
2233 VPIRFlags(Flags), Trunc(nullptr) {
2234 addOperand(VF);
2235 }
2236
2238 VPValue *VF, const InductionDescriptor &IndDesc,
2239 TruncInst *Trunc, const VPIRFlags &Flags,
2240 DebugLoc DL)
2241 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2242 Step, IndDesc, DL),
2243 VPIRFlags(Flags), Trunc(Trunc) {
2244 addOperand(VF);
2246 (void)Metadata;
2247 if (Trunc)
2249 assert(Metadata.empty() && "unexpected metadata on Trunc");
2250 }
2251
2253
2259
2260 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2261
2262 void execute(VPTransformState &State) override {
2263 llvm_unreachable("cannot execute this recipe, should be expanded via "
2264 "expandVPWidenIntOrFpInductionRecipe");
2265 }
2266
2267 /// Returns the start value of the induction.
2269
2270 /// If the recipe has been unrolled, return the VPValue for the induction
2271 /// increment, otherwise return null.
2273 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2274 }
2275
2276 /// Returns the number of incoming values, also number of incoming blocks.
2277 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2278 /// incoming value, its start value.
2279 unsigned getNumIncoming() const override { return 1; }
2280
2281 /// Returns the first defined value as TruncInst, if it is one or nullptr
2282 /// otherwise.
2283 TruncInst *getTruncInst() { return Trunc; }
2284 const TruncInst *getTruncInst() const { return Trunc; }
2285
2286 /// Returns true if the induction is canonical, i.e. starting at 0 and
2287 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2288 /// same type as the canonical induction.
2289 bool isCanonical() const;
2290
2291 /// Returns the scalar type of the induction.
2293 return Trunc ? Trunc->getType() : getStartValue()->getType();
2294 }
2295
2296 /// Returns the VPValue representing the value of this induction at
2297 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2298 /// take place.
2300 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2301 }
2302
2303protected:
2304#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2305 /// Print the recipe.
2306 void printRecipe(raw_ostream &O, const Twine &Indent,
2307 VPSlotTracker &SlotTracker) const override;
2308#endif
2309};
2310
2312public:
2313 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2314 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2315 /// VF*UF.
2317 VPValue *NumUnrolledElems,
2318 const InductionDescriptor &IndDesc, DebugLoc DL)
2319 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2320 Step, IndDesc, DL) {
2321 addOperand(NumUnrolledElems);
2322 }
2323
2325
2331
2332 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2333
2334 /// Generate vector values for the pointer induction.
2335 void execute(VPTransformState &State) override {
2336 llvm_unreachable("cannot execute this recipe, should be expanded via "
2337 "expandVPWidenPointerInduction");
2338 };
2339
2340 /// Returns true if only scalar values will be generated.
2341 bool onlyScalarsGenerated(bool IsScalable);
2342
2343protected:
2344#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2345 /// Print the recipe.
2346 void printRecipe(raw_ostream &O, const Twine &Indent,
2347 VPSlotTracker &SlotTracker) const override;
2348#endif
2349};
2350
2351/// A recipe for widened phis. Incoming values are operands of the recipe and
2352/// their operand index corresponds to the incoming predecessor block. If the
2353/// recipe is placed in an entry block to a (non-replicate) region, it must have
2354/// exactly 2 incoming values, the first from the predecessor of the region and
2355/// the second from the exiting block of the region.
2357 public VPPhiAccessors {
2358 /// Name to use for the generated IR instruction for the widened phi.
2359 std::string Name;
2360
2361public:
2362 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2363 /// debug location \p DL.
2364 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2365 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2366 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2367 if (Start)
2368 addOperand(Start);
2369 }
2370
2373 getOperand(0), getDebugLoc(), Name);
2375 C->addOperand(Op);
2376 return C;
2377 }
2378
2379 ~VPWidenPHIRecipe() override = default;
2380
2381 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2382
2383 /// Generate the phi/select nodes.
2384 void execute(VPTransformState &State) override;
2385
2386 /// Return the cost of this VPWidenPHIRecipe.
2388 VPCostContext &Ctx) const override;
2389
2390protected:
2391#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2392 /// Print the recipe.
2393 void printRecipe(raw_ostream &O, const Twine &Indent,
2394 VPSlotTracker &SlotTracker) const override;
2395#endif
2396
2397 const VPRecipeBase *getAsRecipe() const override { return this; }
2398};
2399
2400/// A recipe for handling first-order recurrence phis. The start value is the
2401/// first operand of the recipe and the incoming value from the backedge is the
2402/// second operand.
2405 VPValue &BackedgeValue)
2406 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {
2407 addOperand(&BackedgeValue);
2408 }
2409
2410 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2411
2416
2417 void execute(VPTransformState &State) override;
2418
2419 /// Return the cost of this first-order recurrence phi recipe.
2421 VPCostContext &Ctx) const override;
2422
2423 /// Returns true if the recipe only uses the first lane of operand \p Op.
2424 bool usesFirstLaneOnly(const VPValue *Op) const override {
2426 "Op must be an operand of the recipe");
2427 return Op == getStartValue();
2428 }
2429
2430protected:
2431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2432 /// Print the recipe.
2433 void printRecipe(raw_ostream &O, const Twine &Indent,
2434 VPSlotTracker &SlotTracker) const override;
2435#endif
2436};
2437
2438/// Possible variants of a reduction.
2439
2440/// This reduction is ordered and in-loop.
2441struct RdxOrdered {};
2442/// This reduction is in-loop.
2443struct RdxInLoop {};
2444/// This reduction is unordered with the partial result scaled down by some
2445/// factor.
2448};
2449using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2450
2451inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2452 unsigned ScaleFactor) {
2453 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2454 if (Ordered)
2455 return RdxOrdered{};
2456 if (InLoop)
2457 return RdxInLoop{};
2458 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2459}
2460
2461/// A recipe for handling reduction phis. The start value is the first operand
2462/// of the recipe and the incoming value from the backedge is the second
2463/// operand.
2465 public VPUnrollPartAccessor<2> {
2466 /// The recurrence kind of the reduction.
2467 const RecurKind Kind;
2468
2469 ReductionStyle Style;
2470
2471 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2472 /// patterns for argmin/argmax).
2473 /// TODO: Also support cases where the phi itself has a single use, but its
2474 /// compare has multiple uses.
2475 bool HasUsesOutsideReductionChain;
2476
2477public:
2478 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2480 VPValue &BackedgeValue, ReductionStyle Style,
2481 bool HasUsesOutsideReductionChain = false)
2482 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2483 Style(Style),
2484 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2485 addOperand(&BackedgeValue);
2486 }
2487
2488 ~VPReductionPHIRecipe() override = default;
2489
2491 return new VPReductionPHIRecipe(
2493 *getOperand(0), *getBackedgeValue(), Style,
2494 HasUsesOutsideReductionChain);
2495 }
2496
2497 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2498
2499 /// Generate the phi/select nodes.
2500 void execute(VPTransformState &State) override;
2501
2502 /// Get the factor that the VF of this recipe's output should be scaled by, or
2503 /// 1 if it isn't scaled.
2504 unsigned getVFScaleFactor() const {
2505 auto *Partial = std::get_if<RdxUnordered>(&Style);
2506 return Partial ? Partial->VFScaleFactor : 1;
2507 }
2508
2509 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2510 /// > 1.
2511 void setVFScaleFactor(unsigned ScaleFactor) {
2512 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2513 Style = RdxUnordered{ScaleFactor};
2514 }
2515
2516 /// Returns the number of incoming values, also number of incoming blocks.
2517 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2518 /// incoming value, its start value.
2519 unsigned getNumIncoming() const override { return 2; }
2520
2521 /// Returns the recurrence kind of the reduction.
2522 RecurKind getRecurrenceKind() const { return Kind; }
2523
2524 /// Returns true, if the phi is part of an ordered reduction.
2525 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2526
2527 /// Returns true if the phi is part of an in-loop reduction.
2528 bool isInLoop() const {
2529 return std::holds_alternative<RdxInLoop>(Style) ||
2530 std::holds_alternative<RdxOrdered>(Style);
2531 }
2532
2533 /// Returns true if the reduction outputs a vector with a scaled down VF.
2534 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2535
2536 /// Returns true, if the phi is part of a multi-use reduction.
2538 return HasUsesOutsideReductionChain;
2539 }
2540
2541 /// Returns true if the recipe only uses the first lane of operand \p Op.
2542 bool usesFirstLaneOnly(const VPValue *Op) const override {
2544 "Op must be an operand of the recipe");
2545 return isOrdered() || isInLoop();
2546 }
2547
2548protected:
2549#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2550 /// Print the recipe.
2551 void printRecipe(raw_ostream &O, const Twine &Indent,
2552 VPSlotTracker &SlotTracker) const override;
2553#endif
2554};
2555
2556/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2557/// instructions.
2559public:
2560 /// The blend operation is a User of the incoming values and of their
2561 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2562 /// be omitted (implied by passing an odd number of operands) in which case
2563 /// all other incoming values are merged into it.
2565 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2566 assert(Operands.size() >= 2 && "Expected at least two operands!");
2567 }
2568
2573
2574 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2575
2576 /// A normalized blend is one that has an odd number of operands, whereby the
2577 /// first operand does not have an associated mask.
2578 bool isNormalized() const { return getNumOperands() % 2; }
2579
2580 /// Return the number of incoming values, taking into account when normalized
2581 /// the first incoming value will have no mask.
2582 unsigned getNumIncomingValues() const {
2583 return (getNumOperands() + isNormalized()) / 2;
2584 }
2585
2586 /// Return incoming value number \p Idx.
2587 VPValue *getIncomingValue(unsigned Idx) const {
2588 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2589 }
2590
2591 /// Return mask number \p Idx.
2592 VPValue *getMask(unsigned Idx) const {
2593 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2594 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2595 }
2596
2597 /// Set mask number \p Idx to \p V.
2598 void setMask(unsigned Idx, VPValue *V) {
2599 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2600 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2601 }
2602
2603 void execute(VPTransformState &State) override {
2604 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2605 }
2606
2607 /// Return the cost of this VPWidenMemoryRecipe.
2608 InstructionCost computeCost(ElementCount VF,
2609 VPCostContext &Ctx) const override;
2610
2611 /// Returns true if the recipe only uses the first lane of operand \p Op.
2612 bool usesFirstLaneOnly(const VPValue *Op) const override {
2614 "Op must be an operand of the recipe");
2615 // Recursing through Blend recipes only, must terminate at header phi's the
2616 // latest.
2617 return all_of(users(),
2618 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2619 }
2620
2621protected:
2622#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2623 /// Print the recipe.
2624 void printRecipe(raw_ostream &O, const Twine &Indent,
2625 VPSlotTracker &SlotTracker) const override;
2626#endif
2627};
2628
2629/// A common base class for interleaved memory operations.
2630/// An Interleaved memory operation is a memory access method that combines
2631/// multiple strided loads/stores into a single wide load/store with shuffles.
2632/// The first operand is the start address. The optional operands are, in order,
2633/// the stored values and the mask.
2635 public VPIRMetadata {
2637
2638 /// Indicates if the interleave group is in a conditional block and requires a
2639 /// mask.
2640 bool HasMask = false;
2641
2642 /// Indicates if gaps between members of the group need to be masked out or if
2643 /// unusued gaps can be loaded speculatively.
2644 bool NeedsMaskForGaps = false;
2645
2646protected:
2647 VPInterleaveBase(const unsigned char SC,
2649 ArrayRef<VPValue *> Operands,
2650 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2651 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2652 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2653 NeedsMaskForGaps(NeedsMaskForGaps) {
2654 // TODO: extend the masked interleaved-group support to reversed access.
2655 assert((!Mask || !IG->isReverse()) &&
2656 "Reversed masked interleave-group not supported.");
2657 if (StoredValues.empty()) {
2658 for (unsigned I = 0; I < IG->getFactor(); ++I)
2659 if (Instruction *Inst = IG->getMember(I)) {
2660 assert(!Inst->getType()->isVoidTy() && "must have result");
2661 new VPRecipeValue(this, Inst);
2662 }
2663 } else {
2664 for (auto *SV : StoredValues)
2665 addOperand(SV);
2666 }
2667 if (Mask) {
2668 HasMask = true;
2669 addOperand(Mask);
2670 }
2671 }
2672
2673public:
2674 VPInterleaveBase *clone() override = 0;
2675
2676 static inline bool classof(const VPRecipeBase *R) {
2677 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2678 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2679 }
2680
2681 static inline bool classof(const VPUser *U) {
2682 auto *R = dyn_cast<VPRecipeBase>(U);
2683 return R && classof(R);
2684 }
2685
2686 /// Return the address accessed by this recipe.
2687 VPValue *getAddr() const {
2688 return getOperand(0); // Address is the 1st, mandatory operand.
2689 }
2690
2691 /// Return the mask used by this recipe. Note that a full mask is represented
2692 /// by a nullptr.
2693 VPValue *getMask() const {
2694 // Mask is optional and the last operand.
2695 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2696 }
2697
2698 /// Return true if the access needs a mask because of the gaps.
2699 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2700
2702
2703 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2704
2705 void execute(VPTransformState &State) override {
2706 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2707 }
2708
2709 /// Return the cost of this recipe.
2710 InstructionCost computeCost(ElementCount VF,
2711 VPCostContext &Ctx) const override;
2712
2713 /// Returns true if the recipe only uses the first lane of operand \p Op.
2714 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2715
2716 /// Returns the number of stored operands of this interleave group. Returns 0
2717 /// for load interleave groups.
2718 virtual unsigned getNumStoreOperands() const = 0;
2719
2720 /// Return the VPValues stored by this interleave group. If it is a load
2721 /// interleave group, return an empty ArrayRef.
2723 return ArrayRef<VPValue *>(op_end() -
2724 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2726 }
2727};
2728
2729/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2730/// or stores into one wide load/store and shuffles. The first operand of a
2731/// VPInterleave recipe is the address, followed by the stored values, followed
2732/// by an optional mask.
2734public:
2736 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2737 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2738 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2739 NeedsMaskForGaps, MD, DL) {}
2740
2741 ~VPInterleaveRecipe() override = default;
2742
2746 needsMaskForGaps(), *this, getDebugLoc());
2747 }
2748
2749 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2750
2751 /// Generate the wide load or store, and shuffles.
2752 void execute(VPTransformState &State) override;
2753
2754 bool usesFirstLaneOnly(const VPValue *Op) const override {
2756 "Op must be an operand of the recipe");
2757 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2758 }
2759
2760 unsigned getNumStoreOperands() const override {
2761 return getNumOperands() - (getMask() ? 2 : 1);
2762 }
2763
2764protected:
2765#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2766 /// Print the recipe.
2767 void printRecipe(raw_ostream &O, const Twine &Indent,
2768 VPSlotTracker &SlotTracker) const override;
2769#endif
2770};
2771
2772/// A recipe for interleaved memory operations with vector-predication
2773/// intrinsics. The first operand is the address, the second operand is the
2774/// explicit vector length. Stored values and mask are optional operands.
2776public:
2778 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2779 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2780 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2781 R.getDebugLoc()) {
2782 assert(!getInterleaveGroup()->isReverse() &&
2783 "Reversed interleave-group with tail folding is not supported.");
2784 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2785 "supported for scalable vector.");
2786 }
2787
2788 ~VPInterleaveEVLRecipe() override = default;
2789
2791 llvm_unreachable("cloning not implemented yet");
2792 }
2793
2794 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2795
2796 /// The VPValue of the explicit vector length.
2797 VPValue *getEVL() const { return getOperand(1); }
2798
2799 /// Generate the wide load or store, and shuffles.
2800 void execute(VPTransformState &State) override;
2801
2802 /// The recipe only uses the first lane of the address, and EVL operand.
2803 bool usesFirstLaneOnly(const VPValue *Op) const override {
2805 "Op must be an operand of the recipe");
2806 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2807 Op == getEVL();
2808 }
2809
2810 unsigned getNumStoreOperands() const override {
2811 return getNumOperands() - (getMask() ? 3 : 2);
2812 }
2813
2814protected:
2815#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2816 /// Print the recipe.
2817 void printRecipe(raw_ostream &O, const Twine &Indent,
2818 VPSlotTracker &SlotTracker) const override;
2819#endif
2820};
2821
2822/// A recipe to represent inloop, ordered or partial reduction operations. It
2823/// performs a reduction on a vector operand into a scalar (vector in the case
2824/// of a partial reduction) value, and adds the result to a chain. The Operands
2825/// are {ChainOp, VecOp, [Condition]}.
2827
2828 /// The recurrence kind for the reduction in question.
2829 RecurKind RdxKind;
2830 /// Whether the reduction is conditional.
2831 bool IsConditional = false;
2832 ReductionStyle Style;
2833
2834protected:
2835 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2837 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2838 ReductionStyle Style, DebugLoc DL)
2839 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2840 Style(Style) {
2841 if (CondOp) {
2842 IsConditional = true;
2843 addOperand(CondOp);
2844 }
2846 }
2847
2848public:
2850 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2852 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2853 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2854 DL) {}
2855
2857 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2859 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2860 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2861 DL) {}
2862
2863 ~VPReductionRecipe() override = default;
2864
2866 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2868 getCondOp(), Style, getDebugLoc());
2869 }
2870
2871 static inline bool classof(const VPRecipeBase *R) {
2872 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2873 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2874 }
2875
2876 static inline bool classof(const VPUser *U) {
2877 auto *R = dyn_cast<VPRecipeBase>(U);
2878 return R && classof(R);
2879 }
2880
2881 static inline bool classof(const VPValue *VPV) {
2882 const VPRecipeBase *R = VPV->getDefiningRecipe();
2883 return R && classof(R);
2884 }
2885
2886 static inline bool classof(const VPSingleDefRecipe *R) {
2887 return classof(static_cast<const VPRecipeBase *>(R));
2888 }
2889
2890 /// Generate the reduction in the loop.
2891 void execute(VPTransformState &State) override;
2892
2893 /// Return the cost of VPReductionRecipe.
2894 InstructionCost computeCost(ElementCount VF,
2895 VPCostContext &Ctx) const override;
2896
2897 /// Return the recurrence kind for the in-loop reduction.
2898 RecurKind getRecurrenceKind() const { return RdxKind; }
2899 /// Return true if the in-loop reduction is ordered.
2900 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2901 /// Return true if the in-loop reduction is conditional.
2902 bool isConditional() const { return IsConditional; };
2903 /// Returns true if the reduction outputs a vector with a scaled down VF.
2904 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2905 /// Returns true if the reduction is in-loop.
2906 bool isInLoop() const {
2907 return std::holds_alternative<RdxInLoop>(Style) ||
2908 std::holds_alternative<RdxOrdered>(Style);
2909 }
2910 /// The VPValue of the scalar Chain being accumulated.
2911 VPValue *getChainOp() const { return getOperand(0); }
2912 /// The VPValue of the vector value to be reduced.
2913 VPValue *getVecOp() const { return getOperand(1); }
2914 /// The VPValue of the condition for the block.
2916 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2917 }
2918 /// Get the factor that the VF of this recipe's output should be scaled by, or
2919 /// 1 if it isn't scaled.
2920 unsigned getVFScaleFactor() const {
2921 auto *Partial = std::get_if<RdxUnordered>(&Style);
2922 return Partial ? Partial->VFScaleFactor : 1;
2923 }
2924
2925protected:
2926#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2927 /// Print the recipe.
2928 void printRecipe(raw_ostream &O, const Twine &Indent,
2929 VPSlotTracker &SlotTracker) const override;
2930#endif
2931};
2932
2933/// A recipe to represent inloop reduction operations with vector-predication
2934/// intrinsics, performing a reduction on a vector operand with the explicit
2935/// vector length (EVL) into a scalar value, and adding the result to a chain.
2936/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2938public:
2942 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2943 R.getFastMathFlags(),
2945 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2946 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2947
2948 ~VPReductionEVLRecipe() override = default;
2949
2951 llvm_unreachable("cloning not implemented yet");
2952 }
2953
2954 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2955
2956 /// Generate the reduction in the loop
2957 void execute(VPTransformState &State) override;
2958
2959 /// The VPValue of the explicit vector length.
2960 VPValue *getEVL() const { return getOperand(2); }
2961
2962 /// Returns true if the recipe only uses the first lane of operand \p Op.
2963 bool usesFirstLaneOnly(const VPValue *Op) const override {
2965 "Op must be an operand of the recipe");
2966 return Op == getEVL();
2967 }
2968
2969protected:
2970#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2971 /// Print the recipe.
2972 void printRecipe(raw_ostream &O, const Twine &Indent,
2973 VPSlotTracker &SlotTracker) const override;
2974#endif
2975};
2976
2977/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2978/// copies of the original scalar type, one per lane, instead of producing a
2979/// single copy of widened type for all lanes. If the instruction is known to be
2980/// a single scalar, only one copy, per lane zero, will be generated.
2982 public VPIRMetadata {
2983 /// Indicator if only a single replica per lane is needed.
2984 bool IsSingleScalar;
2985
2986 /// Indicator if the replicas are also predicated.
2987 bool IsPredicated;
2988
2989public:
2991 bool IsSingleScalar, VPValue *Mask = nullptr,
2992 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2993 DebugLoc DL = DebugLoc::getUnknown())
2994 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2995 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2996 IsPredicated(Mask) {
2997 setUnderlyingValue(I);
2998 if (Mask)
2999 addOperand(Mask);
3000 }
3001
3002 ~VPReplicateRecipe() override = default;
3003
3005 auto *Copy = new VPReplicateRecipe(
3006 getUnderlyingInstr(), operands(), IsSingleScalar,
3007 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3008 Copy->transferFlags(*this);
3009 return Copy;
3010 }
3011
3012 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
3013
3014 /// Generate replicas of the desired Ingredient. Replicas will be generated
3015 /// for all parts and lanes unless a specific part and lane are specified in
3016 /// the \p State.
3017 void execute(VPTransformState &State) override;
3018
3019 /// Return the cost of this VPReplicateRecipe.
3020 InstructionCost computeCost(ElementCount VF,
3021 VPCostContext &Ctx) const override;
3022
3023 bool isSingleScalar() const { return IsSingleScalar; }
3024
3025 bool isPredicated() const { return IsPredicated; }
3026
3027 /// Returns true if the recipe only uses the first lane of operand \p Op.
3028 bool usesFirstLaneOnly(const VPValue *Op) const override {
3030 "Op must be an operand of the recipe");
3031 return isSingleScalar();
3032 }
3033
3034 /// Returns true if the recipe uses scalars of operand \p Op.
3035 bool usesScalars(const VPValue *Op) const override {
3037 "Op must be an operand of the recipe");
3038 return true;
3039 }
3040
3041 /// Returns true if the recipe is used by a widened recipe via an intervening
3042 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3043 /// in a vector.
3044 bool shouldPack() const;
3045
3046 /// Return the mask of a predicated VPReplicateRecipe.
3048 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3049 return getOperand(getNumOperands() - 1);
3050 }
3051
3052 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3053
3054protected:
3055#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3056 /// Print the recipe.
3057 void printRecipe(raw_ostream &O, const Twine &Indent,
3058 VPSlotTracker &SlotTracker) const override;
3059#endif
3060};
3061
3062/// A recipe for generating conditional branches on the bits of a mask.
3064public:
3066 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3067
3070 }
3071
3072 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3073
3074 /// Generate the extraction of the appropriate bit from the block mask and the
3075 /// conditional branch.
3076 void execute(VPTransformState &State) override;
3077
3078 /// Return the cost of this VPBranchOnMaskRecipe.
3079 InstructionCost computeCost(ElementCount VF,
3080 VPCostContext &Ctx) const override;
3081
3082#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3083 /// Print the recipe.
3084 void printRecipe(raw_ostream &O, const Twine &Indent,
3085 VPSlotTracker &SlotTracker) const override {
3086 O << Indent << "BRANCH-ON-MASK ";
3088 }
3089#endif
3090
3091 /// Returns true if the recipe uses scalars of operand \p Op.
3092 bool usesScalars(const VPValue *Op) const override {
3094 "Op must be an operand of the recipe");
3095 return true;
3096 }
3097};
3098
3099/// A recipe to combine multiple recipes into a single 'expression' recipe,
3100/// which should be considered a single entity for cost-modeling and transforms.
3101/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3102/// expression recipes, before execute. The individual expression recipes are
3103/// completely disconnected from the def-use graph of other recipes not part of
3104/// the expression. Def-use edges between pairs of expression recipes remain
3105/// intact, whereas every edge between an expression recipe and a recipe outside
3106/// the expression is elevated to connect the non-expression recipe with the
3107/// VPExpressionRecipe itself.
3108class VPExpressionRecipe : public VPSingleDefRecipe {
3109 /// Recipes included in this VPExpressionRecipe. This could contain
3110 /// duplicates.
3111 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3112
3113 /// Temporary VPValues used for external operands of the expression, i.e.
3114 /// operands not defined by recipes in the expression.
3115 SmallVector<VPValue *> LiveInPlaceholders;
3116
3117 enum class ExpressionTypes {
3118 /// Represents an inloop extended reduction operation, performing a
3119 /// reduction on an extended vector operand into a scalar value, and adding
3120 /// the result to a chain.
3121 ExtendedReduction,
3122 /// Represent an inloop multiply-accumulate reduction, multiplying the
3123 /// extended vector operands, performing a reduction.add on the result, and
3124 /// adding the scalar result to a chain.
3125 ExtMulAccReduction,
3126 /// Represent an inloop multiply-accumulate reduction, multiplying the
3127 /// vector operands, performing a reduction.add on the result, and adding
3128 /// the scalar result to a chain.
3129 MulAccReduction,
3130 /// Represent an inloop multiply-accumulate reduction, multiplying the
3131 /// extended vector operands, negating the multiplication, performing a
3132 /// reduction.add on the result, and adding the scalar result to a chain.
3133 ExtNegatedMulAccReduction,
3134 };
3135
3136 /// Type of the expression.
3137 ExpressionTypes ExpressionType;
3138
3139 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3140 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3141 /// in the expression) are replaced by temporary VPValues and the original
3142 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3143 /// as needed (excluding last) to ensure they are only used by other recipes
3144 /// in the expression.
3145 VPExpressionRecipe(ExpressionTypes ExpressionType,
3146 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3147
3148public:
3150 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3152 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3155 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3156 {Ext0, Ext1, Mul, Red}) {}
3159 VPReductionRecipe *Red)
3160 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3161 {Ext0, Ext1, Mul, Sub, Red}) {
3162 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3163 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3164 "Expected an add reduction");
3165 assert(getNumOperands() >= 3 && "Expected at least three operands");
3166 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3167 assert(SubConst && SubConst->isZero() &&
3168 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3169 }
3170
3172 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3173 for (auto *R : reverse(ExpressionRecipes)) {
3174 if (ExpressionRecipesSeen.insert(R).second)
3175 delete R;
3176 }
3177 for (VPValue *T : LiveInPlaceholders)
3178 delete T;
3179 }
3180
3181 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3182
3183 VPExpressionRecipe *clone() override {
3184 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3185 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3186 for (auto *R : ExpressionRecipes)
3187 NewExpressiondRecipes.push_back(R->clone());
3188 for (auto *New : NewExpressiondRecipes) {
3189 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3190 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3191 // Update placeholder operands in the cloned recipe to use the external
3192 // operands, to be internalized when the cloned expression is constructed.
3193 for (const auto &[Placeholder, OutsideOp] :
3194 zip(LiveInPlaceholders, operands()))
3195 New->replaceUsesOfWith(Placeholder, OutsideOp);
3196 }
3197 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3198 }
3199
3200 /// Return the VPValue to use to infer the result type of the recipe.
3202 unsigned OpIdx =
3203 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3204 : 1;
3205 return getOperand(getNumOperands() - OpIdx);
3206 }
3207
3208 /// Insert the recipes of the expression back into the VPlan, directly before
3209 /// the current recipe. Leaves the expression recipe empty, which must be
3210 /// removed before codegen.
3211 void decompose();
3212
3213 unsigned getVFScaleFactor() const {
3214 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3215 return PR ? PR->getVFScaleFactor() : 1;
3216 }
3217
3218 /// Method for generating code, must not be called as this recipe is abstract.
3219 void execute(VPTransformState &State) override {
3220 llvm_unreachable("recipe must be removed before execute");
3221 }
3222
3224 VPCostContext &Ctx) const override;
3225
3226 /// Returns true if this expression contains recipes that may read from or
3227 /// write to memory.
3228 bool mayReadOrWriteMemory() const;
3229
3230 /// Returns true if this expression contains recipes that may have side
3231 /// effects.
3232 bool mayHaveSideEffects() const;
3233
3234 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3235 bool isSingleScalar() const;
3236
3237protected:
3238#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3239 /// Print the recipe.
3240 void printRecipe(raw_ostream &O, const Twine &Indent,
3241 VPSlotTracker &SlotTracker) const override;
3242#endif
3243};
3244
3245/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3246/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3247/// order to merge values that are set under such a branch and feed their uses.
3248/// The phi nodes can be scalar or vector depending on the users of the value.
3249/// This recipe works in concert with VPBranchOnMaskRecipe.
3251public:
3252 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3253 /// nodes after merging back from a Branch-on-Mask.
3255 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3256 ~VPPredInstPHIRecipe() override = default;
3257
3259 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3260 }
3261
3262 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3263
3264 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3265 /// retain SSA form.
3266 void execute(VPTransformState &State) override;
3267
3268 /// Return the cost of this VPPredInstPHIRecipe.
3270 VPCostContext &Ctx) const override {
3271 // TODO: Compute accurate cost after retiring the legacy cost model.
3272 return 0;
3273 }
3274
3275 /// Returns true if the recipe uses scalars of operand \p Op.
3276 bool usesScalars(const VPValue *Op) const override {
3278 "Op must be an operand of the recipe");
3279 return true;
3280 }
3281
3282protected:
3283#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3284 /// Print the recipe.
3285 void printRecipe(raw_ostream &O, const Twine &Indent,
3286 VPSlotTracker &SlotTracker) const override;
3287#endif
3288};
3289
3290/// A common base class for widening memory operations. An optional mask can be
3291/// provided as the last operand.
3293 public VPIRMetadata {
3294protected:
3296
3297 /// Alignment information for this memory access.
3299
3300 /// Whether the accessed addresses are consecutive.
3302
3303 /// Whether the consecutive accessed addresses are in reverse order.
3305
3306 /// Whether the memory access is masked.
3307 bool IsMasked = false;
3308
3309 void setMask(VPValue *Mask) {
3310 assert(!IsMasked && "cannot re-set mask");
3311 if (!Mask)
3312 return;
3313 addOperand(Mask);
3314 IsMasked = true;
3315 }
3316
3317 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3318 std::initializer_list<VPValue *> Operands,
3319 bool Consecutive, bool Reverse,
3320 const VPIRMetadata &Metadata, DebugLoc DL)
3321 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3323 Reverse(Reverse) {
3324 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3326 "Reversed acccess without VPVectorEndPointerRecipe address?");
3327 }
3328
3329public:
3331 llvm_unreachable("cloning not supported");
3332 }
3333
3334 static inline bool classof(const VPRecipeBase *R) {
3335 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3336 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3337 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3338 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3339 }
3340
3341 static inline bool classof(const VPUser *U) {
3342 auto *R = dyn_cast<VPRecipeBase>(U);
3343 return R && classof(R);
3344 }
3345
3346 /// Return whether the loaded-from / stored-to addresses are consecutive.
3347 bool isConsecutive() const { return Consecutive; }
3348
3349 /// Return whether the consecutive loaded/stored addresses are in reverse
3350 /// order.
3351 bool isReverse() const { return Reverse; }
3352
3353 /// Return the address accessed by this recipe.
3354 VPValue *getAddr() const { return getOperand(0); }
3355
3356 /// Returns true if the recipe is masked.
3357 bool isMasked() const { return IsMasked; }
3358
3359 /// Return the mask used by this recipe. Note that a full mask is represented
3360 /// by a nullptr.
3361 VPValue *getMask() const {
3362 // Mask is optional and therefore the last operand.
3363 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3364 }
3365
3366 /// Returns the alignment of the memory access.
3367 Align getAlign() const { return Alignment; }
3368
3369 /// Generate the wide load/store.
3370 void execute(VPTransformState &State) override {
3371 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3372 }
3373
3374 /// Return the cost of this VPWidenMemoryRecipe.
3375 InstructionCost computeCost(ElementCount VF,
3376 VPCostContext &Ctx) const override;
3377
3379};
3380
3381/// A recipe for widening load operations, using the address to load from and an
3382/// optional mask.
3384 public VPRecipeValue {
3386 bool Consecutive, bool Reverse,
3387 const VPIRMetadata &Metadata, DebugLoc DL)
3388 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3389 Reverse, Metadata, DL),
3390 VPRecipeValue(this, &Load) {
3391 setMask(Mask);
3392 }
3393
3396 getMask(), Consecutive, Reverse, *this,
3397 getDebugLoc());
3398 }
3399
3400 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3401
3402 /// Generate a wide load or gather.
3403 void execute(VPTransformState &State) override;
3404
3405 /// Returns true if the recipe only uses the first lane of operand \p Op.
3406 bool usesFirstLaneOnly(const VPValue *Op) const override {
3408 "Op must be an operand of the recipe");
3409 // Widened, consecutive loads operations only demand the first lane of
3410 // their address.
3411 return Op == getAddr() && isConsecutive();
3412 }
3413
3414protected:
3415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3416 /// Print the recipe.
3417 void printRecipe(raw_ostream &O, const Twine &Indent,
3418 VPSlotTracker &SlotTracker) const override;
3419#endif
3420};
3421
3422/// A recipe for widening load operations with vector-predication intrinsics,
3423/// using the address to load from, the explicit vector length and an optional
3424/// mask.
3426 public VPRecipeValue {
3428 VPValue *Mask)
3429 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3430 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3431 L.getDebugLoc()),
3432 VPRecipeValue(this, &getIngredient()) {
3433 setMask(Mask);
3434 }
3435
3436 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3437
3438 /// Return the EVL operand.
3439 VPValue *getEVL() const { return getOperand(1); }
3440
3441 /// Generate the wide load or gather.
3442 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3443
3444 /// Return the cost of this VPWidenLoadEVLRecipe.
3446 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3447
3448 /// Returns true if the recipe only uses the first lane of operand \p Op.
3449 bool usesFirstLaneOnly(const VPValue *Op) const override {
3451 "Op must be an operand of the recipe");
3452 // Widened loads only demand the first lane of EVL and consecutive loads
3453 // only demand the first lane of their address.
3454 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3455 }
3456
3457protected:
3458#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3459 /// Print the recipe.
3460 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3461 VPSlotTracker &SlotTracker) const override;
3462#endif
3463};
3464
3465/// A recipe for widening store operations, using the stored value, the address
3466/// to store to and an optional mask.
3468 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3469 VPValue *Mask, bool Consecutive, bool Reverse,
3470 const VPIRMetadata &Metadata, DebugLoc DL)
3471 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3472 Consecutive, Reverse, Metadata, DL) {
3473 setMask(Mask);
3474 }
3475
3481
3482 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3483
3484 /// Return the value stored by this recipe.
3485 VPValue *getStoredValue() const { return getOperand(1); }
3486
3487 /// Generate a wide store or scatter.
3488 void execute(VPTransformState &State) override;
3489
3490 /// Returns true if the recipe only uses the first lane of operand \p Op.
3491 bool usesFirstLaneOnly(const VPValue *Op) const override {
3493 "Op must be an operand of the recipe");
3494 // Widened, consecutive stores only demand the first lane of their address,
3495 // unless the same operand is also stored.
3496 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3497 }
3498
3499protected:
3500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3501 /// Print the recipe.
3502 void printRecipe(raw_ostream &O, const Twine &Indent,
3503 VPSlotTracker &SlotTracker) const override;
3504#endif
3505};
3506
3507/// A recipe for widening store operations with vector-predication intrinsics,
3508/// using the value to store, the address to store to, the explicit vector
3509/// length and an optional mask.
3512 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3513 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3514 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3515 S.isReverse(), S, S.getDebugLoc()) {
3516 setMask(Mask);
3517 }
3518
3519 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3520
3521 /// Return the address accessed by this recipe.
3522 VPValue *getStoredValue() const { return getOperand(1); }
3523
3524 /// Return the EVL operand.
3525 VPValue *getEVL() const { return getOperand(2); }
3526
3527 /// Generate the wide store or scatter.
3528 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3529
3530 /// Return the cost of this VPWidenStoreEVLRecipe.
3532 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3533
3534 /// Returns true if the recipe only uses the first lane of operand \p Op.
3535 bool usesFirstLaneOnly(const VPValue *Op) const override {
3537 "Op must be an operand of the recipe");
3538 if (Op == getEVL()) {
3539 assert(getStoredValue() != Op && "unexpected store of EVL");
3540 return true;
3541 }
3542 // Widened, consecutive memory operations only demand the first lane of
3543 // their address, unless the same operand is also stored. That latter can
3544 // happen with opaque pointers.
3545 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3546 }
3547
3548protected:
3549#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3550 /// Print the recipe.
3551 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3552 VPSlotTracker &SlotTracker) const override;
3553#endif
3554};
3555
3556/// Recipe to expand a SCEV expression.
3558 const SCEV *Expr;
3559
3560public:
3562 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3563
3564 ~VPExpandSCEVRecipe() override = default;
3565
3566 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3567
3568 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3569
3570 void execute(VPTransformState &State) override {
3571 llvm_unreachable("SCEV expressions must be expanded before final execute");
3572 }
3573
3574 /// Return the cost of this VPExpandSCEVRecipe.
3576 VPCostContext &Ctx) const override {
3577 // TODO: Compute accurate cost after retiring the legacy cost model.
3578 return 0;
3579 }
3580
3581 const SCEV *getSCEV() const { return Expr; }
3582
3583protected:
3584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3585 /// Print the recipe.
3586 void printRecipe(raw_ostream &O, const Twine &Indent,
3587 VPSlotTracker &SlotTracker) const override;
3588#endif
3589};
3590
3591/// Canonical scalar induction phi of the vector loop. Starting at the specified
3592/// start value (either 0 or the resume value when vectorizing the epilogue
3593/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3594/// canonical induction variable.
3596public:
3598 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3599
3600 ~VPCanonicalIVPHIRecipe() override = default;
3601
3604 R->addOperand(getBackedgeValue());
3605 return R;
3606 }
3607
3608 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3609
3610 void execute(VPTransformState &State) override {
3611 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3612 "scalar phi recipe");
3613 }
3614
3615 /// Returns the start value of the canonical induction.
3617
3618 /// Returns the scalar type of the induction.
3619 Type *getScalarType() const { return getStartValue()->getType(); }
3620
3621 /// Returns true if the recipe only uses the first lane of operand \p Op.
3622 bool usesFirstLaneOnly(const VPValue *Op) const override {
3624 "Op must be an operand of the recipe");
3625 return true;
3626 }
3627
3628 /// Returns true if the recipe only uses the first part of operand \p Op.
3629 bool usesFirstPartOnly(const VPValue *Op) const override {
3631 "Op must be an operand of the recipe");
3632 return true;
3633 }
3634
3635 /// Return the cost of this VPCanonicalIVPHIRecipe.
3637 VPCostContext &Ctx) const override {
3638 // For now, match the behavior of the legacy cost model.
3639 return 0;
3640 }
3641
3642protected:
3643#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3644 /// Print the recipe.
3645 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3646 VPSlotTracker &SlotTracker) const override;
3647#endif
3648};
3649
3650/// A recipe for generating the active lane mask for the vector loop that is
3651/// used to predicate the vector operations.
3653public:
3655 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3656 DL) {}
3657
3658 ~VPActiveLaneMaskPHIRecipe() override = default;
3659
3662 if (getNumOperands() == 2)
3663 R->addOperand(getOperand(1));
3664 return R;
3665 }
3666
3667 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3668
3669 /// Generate the active lane mask phi of the vector loop.
3670 void execute(VPTransformState &State) override;
3671
3672protected:
3673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3674 /// Print the recipe.
3675 void printRecipe(raw_ostream &O, const Twine &Indent,
3676 VPSlotTracker &SlotTracker) const override;
3677#endif
3678};
3679
3680/// A recipe for generating the phi node for the current index of elements,
3681/// adjusted in accordance with EVL value. It starts at the start value of the
3682/// canonical induction and gets incremented by EVL in each iteration of the
3683/// vector loop.
3685public:
3687 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3688
3689 ~VPEVLBasedIVPHIRecipe() override = default;
3690
3692 llvm_unreachable("cloning not implemented yet");
3693 }
3694
3695 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3696
3697 void execute(VPTransformState &State) override {
3698 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3699 "scalar phi recipe");
3700 }
3701
3702 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3704 VPCostContext &Ctx) const override {
3705 // For now, match the behavior of the legacy cost model.
3706 return 0;
3707 }
3708
3709 /// Returns true if the recipe only uses the first lane of operand \p Op.
3710 bool usesFirstLaneOnly(const VPValue *Op) const override {
3712 "Op must be an operand of the recipe");
3713 return true;
3714 }
3715
3716protected:
3717#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3718 /// Print the recipe.
3719 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3720 VPSlotTracker &SlotTracker) const override;
3721#endif
3722};
3723
3724/// A Recipe for widening the canonical induction variable of the vector loop.
3726 public VPUnrollPartAccessor<1> {
3727public:
3729 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3730
3731 ~VPWidenCanonicalIVRecipe() override = default;
3732
3737
3738 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3739
3740 /// Generate a canonical vector induction variable of the vector loop, with
3741 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3742 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3743 void execute(VPTransformState &State) override;
3744
3745 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3747 VPCostContext &Ctx) const override {
3748 // TODO: Compute accurate cost after retiring the legacy cost model.
3749 return 0;
3750 }
3751
3752protected:
3753#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3754 /// Print the recipe.
3755 void printRecipe(raw_ostream &O, const Twine &Indent,
3756 VPSlotTracker &SlotTracker) const override;
3757#endif
3758};
3759
3760/// A recipe for converting the input value \p IV value to the corresponding
3761/// value of an IV with different start and step values, using Start + IV *
3762/// Step.
3764 /// Kind of the induction.
3766 /// If not nullptr, the floating point induction binary operator. Must be set
3767 /// for floating point inductions.
3768 const FPMathOperator *FPBinOp;
3769
3770 /// Name to use for the generated IR instruction for the derived IV.
3771 std::string Name;
3772
3773public:
3775 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3776 const Twine &Name = "")
3778 IndDesc.getKind(),
3779 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3780 Start, CanonicalIV, Step, Name) {}
3781
3783 const FPMathOperator *FPBinOp, VPIRValue *Start,
3784 VPValue *IV, VPValue *Step, const Twine &Name = "")
3785 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3786 FPBinOp(FPBinOp), Name(Name.str()) {}
3787
3788 ~VPDerivedIVRecipe() override = default;
3789
3791 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3792 getStepValue());
3793 }
3794
3795 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3796
3797 /// Generate the transformed value of the induction at offset StartValue (1.
3798 /// operand) + IV (2. operand) * StepValue (3, operand).
3799 void execute(VPTransformState &State) override;
3800
3801 /// Return the cost of this VPDerivedIVRecipe.
3803 VPCostContext &Ctx) const override {
3804 // TODO: Compute accurate cost after retiring the legacy cost model.
3805 return 0;
3806 }
3807
3808 Type *getScalarType() const { return getStartValue()->getType(); }
3809
3811 VPValue *getStepValue() const { return getOperand(2); }
3812
3813 /// Returns true if the recipe only uses the first lane of operand \p Op.
3814 bool usesFirstLaneOnly(const VPValue *Op) const override {
3816 "Op must be an operand of the recipe");
3817 return true;
3818 }
3819
3820protected:
3821#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3822 /// Print the recipe.
3823 void printRecipe(raw_ostream &O, const Twine &Indent,
3824 VPSlotTracker &SlotTracker) const override;
3825#endif
3826};
3827
3828/// A recipe for handling phi nodes of integer and floating-point inductions,
3829/// producing their scalar values.
3831 public VPUnrollPartAccessor<3> {
3832 Instruction::BinaryOps InductionOpcode;
3833
3834public:
3837 DebugLoc DL)
3838 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3839 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3840 InductionOpcode(Opcode) {}
3841
3843 VPValue *Step, VPValue *VF,
3846 IV, Step, VF, IndDesc.getInductionOpcode(),
3847 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3848 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3849 : FastMathFlags(),
3850 DL) {}
3851
3852 ~VPScalarIVStepsRecipe() override = default;
3853
3855 return new VPScalarIVStepsRecipe(
3856 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3858 getDebugLoc());
3859 }
3860
3861 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3862 /// this is only accurate after the VPlan has been unrolled.
3863 bool isPart0() const { return getUnrollPart(*this) == 0; }
3864
3865 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3866
3867 /// Generate the scalarized versions of the phi node as needed by their users.
3868 void execute(VPTransformState &State) override;
3869
3870 /// Return the cost of this VPScalarIVStepsRecipe.
3872 VPCostContext &Ctx) const override {
3873 // TODO: Compute accurate cost after retiring the legacy cost model.
3874 return 0;
3875 }
3876
3877 VPValue *getStepValue() const { return getOperand(1); }
3878
3879 /// Returns true if the recipe only uses the first lane of operand \p Op.
3880 bool usesFirstLaneOnly(const VPValue *Op) const override {
3882 "Op must be an operand of the recipe");
3883 return true;
3884 }
3885
3886protected:
3887#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3888 /// Print the recipe.
3889 void printRecipe(raw_ostream &O, const Twine &Indent,
3890 VPSlotTracker &SlotTracker) const override;
3891#endif
3892};
3893
3894/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3895/// types implementing VPPhiAccessors. Used by isa<> & co.
3897 static inline bool isPossible(const VPRecipeBase *f) {
3898 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3900 }
3901};
3902/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3903/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3904template <typename SrcTy>
3905struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3906
3908
3909 /// doCast is used by cast<>.
3910 static inline VPPhiAccessors *doCast(SrcTy R) {
3911 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3912 switch (R->getVPDefID()) {
3913 case VPDef::VPInstructionSC:
3914 return cast<VPPhi>(R);
3915 case VPDef::VPIRInstructionSC:
3916 return cast<VPIRPhi>(R);
3917 case VPDef::VPWidenPHISC:
3918 return cast<VPWidenPHIRecipe>(R);
3919 default:
3920 return cast<VPHeaderPHIRecipe>(R);
3921 }
3922 }());
3923 }
3924
3925 /// doCastIfPossible is used by dyn_cast<>.
3926 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3927 if (!Self::isPossible(f))
3928 return nullptr;
3929 return doCast(f);
3930 }
3931};
3932template <>
3935template <>
3938
3939/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3940/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3941namespace detail {
3942template <typename DstTy, typename RecipeBasePtrTy>
3943static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3944 switch (R->getVPDefID()) {
3945 case VPDef::VPInstructionSC:
3946 return cast<VPInstruction>(R);
3947 case VPDef::VPWidenSC:
3948 return cast<VPWidenRecipe>(R);
3949 case VPDef::VPWidenCastSC:
3950 return cast<VPWidenCastRecipe>(R);
3951 case VPDef::VPWidenIntrinsicSC:
3953 case VPDef::VPWidenCallSC:
3954 return cast<VPWidenCallRecipe>(R);
3955 case VPDef::VPReplicateSC:
3956 return cast<VPReplicateRecipe>(R);
3957 case VPDef::VPInterleaveSC:
3958 case VPDef::VPInterleaveEVLSC:
3959 return cast<VPInterleaveBase>(R);
3960 case VPDef::VPWidenLoadSC:
3961 case VPDef::VPWidenLoadEVLSC:
3962 case VPDef::VPWidenStoreSC:
3963 case VPDef::VPWidenStoreEVLSC:
3964 return cast<VPWidenMemoryRecipe>(R);
3965 default:
3966 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3967 }
3968}
3969} // namespace detail
3970
3971/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3972/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3973template <typename DstTy, typename SrcTy>
3974struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3975 static inline bool isPossible(SrcTy R) {
3976 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3977 // also handled in castToVPIRMetadata.
3982 R);
3983 }
3984
3985 using RetTy = DstTy *;
3986
3987 /// doCast is used by cast<>.
3988 static inline RetTy doCast(SrcTy R) {
3990 }
3991
3992 /// doCastIfPossible is used by dyn_cast<>.
3993 static inline RetTy doCastIfPossible(SrcTy R) {
3994 if (!isPossible(R))
3995 return nullptr;
3996 return doCast(R);
3997 }
3998};
3999template <>
4002template <>
4005
4006/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4007/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4008/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4009class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4010 friend class VPlan;
4011
4012 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4013 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4014 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4015 if (Recipe)
4016 appendRecipe(Recipe);
4017 }
4018
4019public:
4021
4022protected:
4023 /// The VPRecipes held in the order of output instructions to generate.
4025
4026 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4027 : VPBlockBase(BlockSC, Name.str()) {}
4028
4029public:
4030 ~VPBasicBlock() override {
4031 while (!Recipes.empty())
4032 Recipes.pop_back();
4033 }
4034
4035 /// Instruction iterators...
4040
4041 //===--------------------------------------------------------------------===//
4042 /// Recipe iterator methods
4043 ///
4044 inline iterator begin() { return Recipes.begin(); }
4045 inline const_iterator begin() const { return Recipes.begin(); }
4046 inline iterator end() { return Recipes.end(); }
4047 inline const_iterator end() const { return Recipes.end(); }
4048
4049 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4050 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4051 inline reverse_iterator rend() { return Recipes.rend(); }
4052 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4053
4054 inline size_t size() const { return Recipes.size(); }
4055 inline bool empty() const { return Recipes.empty(); }
4056 inline const VPRecipeBase &front() const { return Recipes.front(); }
4057 inline VPRecipeBase &front() { return Recipes.front(); }
4058 inline const VPRecipeBase &back() const { return Recipes.back(); }
4059 inline VPRecipeBase &back() { return Recipes.back(); }
4060
4061 /// Returns a reference to the list of recipes.
4063
4064 /// Returns a pointer to a member of the recipe list.
4065 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4066 return &VPBasicBlock::Recipes;
4067 }
4068
4069 /// Method to support type inquiry through isa, cast, and dyn_cast.
4070 static inline bool classof(const VPBlockBase *V) {
4071 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4072 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4073 }
4074
4075 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4076 assert(Recipe && "No recipe to append.");
4077 assert(!Recipe->Parent && "Recipe already in VPlan");
4078 Recipe->Parent = this;
4079 Recipes.insert(InsertPt, Recipe);
4080 }
4081
4082 /// Augment the existing recipes of a VPBasicBlock with an additional
4083 /// \p Recipe as the last recipe.
4084 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4085
4086 /// The method which generates the output IR instructions that correspond to
4087 /// this VPBasicBlock, thereby "executing" the VPlan.
4088 void execute(VPTransformState *State) override;
4089
4090 /// Return the cost of this VPBasicBlock.
4091 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4092
4093 /// Return the position of the first non-phi node recipe in the block.
4094 iterator getFirstNonPhi();
4095
4096 /// Returns an iterator range over the PHI-like recipes in the block.
4100
4101 /// Split current block at \p SplitAt by inserting a new block between the
4102 /// current block and its successors and moving all recipes starting at
4103 /// SplitAt to the new block. Returns the new block.
4104 VPBasicBlock *splitAt(iterator SplitAt);
4105
4106 VPRegionBlock *getEnclosingLoopRegion();
4107 const VPRegionBlock *getEnclosingLoopRegion() const;
4108
4109#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4110 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4111 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4112 ///
4113 /// Note that the numbering is applied to the whole VPlan, so printing
4114 /// individual blocks is consistent with the whole VPlan printing.
4115 void print(raw_ostream &O, const Twine &Indent,
4116 VPSlotTracker &SlotTracker) const override;
4117 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4118#endif
4119
4120 /// If the block has multiple successors, return the branch recipe terminating
4121 /// the block. If there are no or only a single successor, return nullptr;
4122 VPRecipeBase *getTerminator();
4123 const VPRecipeBase *getTerminator() const;
4124
4125 /// Returns true if the block is exiting it's parent region.
4126 bool isExiting() const;
4127
4128 /// Clone the current block and it's recipes, without updating the operands of
4129 /// the cloned recipes.
4130 VPBasicBlock *clone() override;
4131
4132 /// Returns the predecessor block at index \p Idx with the predecessors as per
4133 /// the corresponding plain CFG. If the block is an entry block to a region,
4134 /// the first predecessor is the single predecessor of a region, and the
4135 /// second predecessor is the exiting block of the region.
4136 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4137
4138protected:
4139 /// Execute the recipes in the IR basic block \p BB.
4140 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4141
4142 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4143 /// generated for this VPBB.
4144 void connectToPredecessors(VPTransformState &State);
4145
4146private:
4147 /// Create an IR BasicBlock to hold the output instructions generated by this
4148 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4149 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4150};
4151
4152inline const VPBasicBlock *
4154 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4155}
4156
4157/// A special type of VPBasicBlock that wraps an existing IR basic block.
4158/// Recipes of the block get added before the first non-phi instruction in the
4159/// wrapped block.
4160/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4161/// preheader block.
4162class VPIRBasicBlock : public VPBasicBlock {
4163 friend class VPlan;
4164
4165 BasicBlock *IRBB;
4166
4167 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4168 VPIRBasicBlock(BasicBlock *IRBB)
4169 : VPBasicBlock(VPIRBasicBlockSC,
4170 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4171 IRBB(IRBB) {}
4172
4173public:
4174 ~VPIRBasicBlock() override = default;
4175
4176 static inline bool classof(const VPBlockBase *V) {
4177 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4178 }
4179
4180 /// The method which generates the output IR instructions that correspond to
4181 /// this VPBasicBlock, thereby "executing" the VPlan.
4182 void execute(VPTransformState *State) override;
4183
4184 VPIRBasicBlock *clone() override;
4185
4186 BasicBlock *getIRBasicBlock() const { return IRBB; }
4187};
4188
4189/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4190/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4191/// A VPRegionBlock may indicate that its contents are to be replicated several
4192/// times. This is designed to support predicated scalarization, in which a
4193/// scalar if-then code structure needs to be generated VF * UF times. Having
4194/// this replication indicator helps to keep a single model for multiple
4195/// candidate VF's. The actual replication takes place only once the desired VF
4196/// and UF have been determined.
4197class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4198 friend class VPlan;
4199
4200 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4201 VPBlockBase *Entry;
4202
4203 /// Hold the Single Exiting block of the SESE region modelled by the
4204 /// VPRegionBlock.
4205 VPBlockBase *Exiting;
4206
4207 /// An indicator whether this region is to generate multiple replicated
4208 /// instances of output IR corresponding to its VPBlockBases.
4209 bool IsReplicator;
4210
4211 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4212 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4213 const std::string &Name = "", bool IsReplicator = false)
4214 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4215 IsReplicator(IsReplicator) {
4216 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4217 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4218 Entry->setParent(this);
4219 Exiting->setParent(this);
4220 }
4221 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4222 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4223 IsReplicator(IsReplicator) {}
4224
4225public:
4226 ~VPRegionBlock() override = default;
4227
4228 /// Method to support type inquiry through isa, cast, and dyn_cast.
4229 static inline bool classof(const VPBlockBase *V) {
4230 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4231 }
4232
4233 const VPBlockBase *getEntry() const { return Entry; }
4234 VPBlockBase *getEntry() { return Entry; }
4235
4236 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4237 /// EntryBlock must have no predecessors.
4238 void setEntry(VPBlockBase *EntryBlock) {
4239 assert(EntryBlock->getPredecessors().empty() &&
4240 "Entry block cannot have predecessors.");
4241 Entry = EntryBlock;
4242 EntryBlock->setParent(this);
4243 }
4244
4245 const VPBlockBase *getExiting() const { return Exiting; }
4246 VPBlockBase *getExiting() { return Exiting; }
4247
4248 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4249 /// ExitingBlock must have no successors.
4250 void setExiting(VPBlockBase *ExitingBlock) {
4251 assert(ExitingBlock->getSuccessors().empty() &&
4252 "Exit block cannot have successors.");
4253 Exiting = ExitingBlock;
4254 ExitingBlock->setParent(this);
4255 }
4256
4257 /// Returns the pre-header VPBasicBlock of the loop region.
4259 assert(!isReplicator() && "should only get pre-header of loop regions");
4260 return getSinglePredecessor()->getExitingBasicBlock();
4261 }
4262
4263 /// An indicator whether this region is to generate multiple replicated
4264 /// instances of output IR corresponding to its VPBlockBases.
4265 bool isReplicator() const { return IsReplicator; }
4266
4267 /// The method which generates the output IR instructions that correspond to
4268 /// this VPRegionBlock, thereby "executing" the VPlan.
4269 void execute(VPTransformState *State) override;
4270
4271 // Return the cost of this region.
4272 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4273
4274#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4275 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4276 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4277 /// consequtive numbers.
4278 ///
4279 /// Note that the numbering is applied to the whole VPlan, so printing
4280 /// individual regions is consistent with the whole VPlan printing.
4281 void print(raw_ostream &O, const Twine &Indent,
4282 VPSlotTracker &SlotTracker) const override;
4283 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4284#endif
4285
4286 /// Clone all blocks in the single-entry single-exit region of the block and
4287 /// their recipes without updating the operands of the cloned recipes.
4288 VPRegionBlock *clone() override;
4289
4290 /// Remove the current region from its VPlan, connecting its predecessor to
4291 /// its entry, and its exiting block to its successor.
4292 void dissolveToCFGLoop();
4293
4294 /// Returns the canonical induction recipe of the region.
4296 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4297 if (EntryVPBB->empty()) {
4298 // VPlan native path. TODO: Unify both code paths.
4299 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4300 }
4301 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4302 }
4304 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4305 }
4306
4307 /// Return the type of the canonical IV for loop regions.
4308 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4309 const Type *getCanonicalIVType() const {
4310 return getCanonicalIV()->getScalarType();
4311 }
4312};
4313
4315 return getParent()->getParent();
4316}
4317
4319 return getParent()->getParent();
4320}
4321
4322/// VPlan models a candidate for vectorization, encoding various decisions take
4323/// to produce efficient output IR, including which branches, basic-blocks and
4324/// output IR instructions to generate, and their cost. VPlan holds a
4325/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4326/// VPBasicBlock.
4327class VPlan {
4328 friend class VPlanPrinter;
4329 friend class VPSlotTracker;
4330
4331 /// VPBasicBlock corresponding to the original preheader. Used to place
4332 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4333 /// rest of VPlan execution.
4334 /// When this VPlan is used for the epilogue vector loop, the entry will be
4335 /// replaced by a new entry block created during skeleton creation.
4336 VPBasicBlock *Entry;
4337
4338 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4339 VPIRBasicBlock *ScalarHeader;
4340
4341 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4342 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4343 /// e.g. if the scalar epilogue always executes.
4345
4346 /// Holds the VFs applicable to this VPlan.
4348
4349 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4350 /// any UF.
4352
4353 /// Holds the name of the VPlan, for printing.
4354 std::string Name;
4355
4356 /// Represents the trip count of the original loop, for folding
4357 /// the tail.
4358 VPValue *TripCount = nullptr;
4359
4360 /// Represents the backedge taken count of the original loop, for folding
4361 /// the tail. It equals TripCount - 1.
4362 VPSymbolicValue *BackedgeTakenCount = nullptr;
4363
4364 /// Represents the vector trip count.
4365 VPSymbolicValue VectorTripCount;
4366
4367 /// Represents the vectorization factor of the loop.
4368 VPSymbolicValue VF;
4369
4370 /// Represents the loop-invariant VF * UF of the vector loop region.
4371 VPSymbolicValue VFxUF;
4372
4373 /// Contains all the external definitions created for this VPlan, as a mapping
4374 /// from IR Values to VPIRValues.
4376
4377 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4378 /// VPlan is destroyed.
4379 SmallVector<VPBlockBase *> CreatedBlocks;
4380
4381 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4382 /// wrapping the original header of the scalar loop.
4383 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4384 : Entry(Entry), ScalarHeader(ScalarHeader) {
4385 Entry->setPlan(this);
4386 assert(ScalarHeader->getNumSuccessors() == 0 &&
4387 "scalar header must be a leaf node");
4388 }
4389
4390public:
4391 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4392 /// original preheader and scalar header of \p L, to be used as entry and
4393 /// scalar header blocks of the new VPlan.
4394 VPlan(Loop *L);
4395
4396 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4397 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4398 VPlan(BasicBlock *ScalarHeaderBB) {
4399 setEntry(createVPBasicBlock("preheader"));
4400 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4401 }
4402
4404
4406 Entry = VPBB;
4407 VPBB->setPlan(this);
4408 }
4409
4410 /// Generate the IR code for this VPlan.
4411 void execute(VPTransformState *State);
4412
4413 /// Return the cost of this plan.
4415
4416 VPBasicBlock *getEntry() { return Entry; }
4417 const VPBasicBlock *getEntry() const { return Entry; }
4418
4419 /// Returns the preheader of the vector loop region, if one exists, or null
4420 /// otherwise.
4422 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4423 return VectorRegion
4424 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4425 : nullptr;
4426 }
4427
4428 /// Returns the VPRegionBlock of the vector loop.
4431
4432 /// Returns the 'middle' block of the plan, that is the block that selects
4433 /// whether to execute the scalar tail loop or the exit block from the loop
4434 /// latch. If there is an early exit from the vector loop, the middle block
4435 /// conceptully has the early exit block as third successor, split accross 2
4436 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4437 /// tail loop or the exit block. If the scalar tail loop or exit block are
4438 /// known to always execute, the middle block may branch directly to that
4439 /// block. This function cannot be called once the vector loop region has been
4440 /// removed.
4442 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4443 assert(
4444 LoopRegion &&
4445 "cannot call the function after vector loop region has been removed");
4446 // The middle block is always the last successor of the region.
4447 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4448 }
4449
4451 return const_cast<VPlan *>(this)->getMiddleBlock();
4452 }
4453
4454 /// Return the VPBasicBlock for the preheader of the scalar loop.
4456 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4457 }
4458
4459 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4460 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4461
4462 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4463 /// the original scalar loop.
4464 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4465
4466 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4467 /// exit block.
4469
4470 /// Returns true if \p VPBB is an exit block.
4471 bool isExitBlock(VPBlockBase *VPBB);
4472
4473 /// The trip count of the original loop.
4475 assert(TripCount && "trip count needs to be set before accessing it");
4476 return TripCount;
4477 }
4478
4479 /// Set the trip count assuming it is currently null; if it is not - use
4480 /// resetTripCount().
4481 void setTripCount(VPValue *NewTripCount) {
4482 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4483 TripCount = NewTripCount;
4484 }
4485
4486 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4487 /// the original trip count have been replaced.
4488 void resetTripCount(VPValue *NewTripCount) {
4489 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4490 "TripCount must be set when resetting");
4491 TripCount = NewTripCount;
4492 }
4493
4494 /// The backedge taken count of the original loop.
4496 if (!BackedgeTakenCount)
4497 BackedgeTakenCount = new VPSymbolicValue();
4498 return BackedgeTakenCount;
4499 }
4500 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4501
4502 /// The vector trip count.
4503 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4504
4505 /// Returns the VF of the vector loop region.
4506 VPValue &getVF() { return VF; };
4507 const VPValue &getVF() const { return VF; };
4508
4509 /// Returns VF * UF of the vector loop region.
4510 VPValue &getVFxUF() { return VFxUF; }
4511
4514 }
4515
4516 void addVF(ElementCount VF) { VFs.insert(VF); }
4517
4519 assert(hasVF(VF) && "Cannot set VF not already in plan");
4520 VFs.clear();
4521 VFs.insert(VF);
4522 }
4523
4524 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4525 bool hasScalableVF() const {
4526 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4527 }
4528
4529 /// Returns an iterator range over all VFs of the plan.
4532 return VFs;
4533 }
4534
4535 bool hasScalarVFOnly() const {
4536 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4537 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4538 "Plan with scalar VF should only have a single VF");
4539 return HasScalarVFOnly;
4540 }
4541
4542 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4543
4544 unsigned getUF() const {
4545 assert(UFs.size() == 1 && "Expected a single UF");
4546 return UFs[0];
4547 }
4548
4549 void setUF(unsigned UF) {
4550 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4551 UFs.clear();
4552 UFs.insert(UF);
4553 }
4554
4555 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4556 /// concrete UF.
4557 bool isUnrolled() const { return UFs.size() == 1; }
4558
4559 /// Return a string with the name of the plan and the applicable VFs and UFs.
4560 std::string getName() const;
4561
4562 void setName(const Twine &newName) { Name = newName.str(); }
4563
4564 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4565 /// yet) for \p V.
4567 assert(V && "Trying to get or add the VPIRValue of a null Value");
4568 auto [It, Inserted] = LiveIns.try_emplace(V);
4569 if (Inserted) {
4570 if (auto *CI = dyn_cast<ConstantInt>(V))
4571 It->second = new VPConstantInt(CI);
4572 else
4573 It->second = new VPIRValue(V);
4574 }
4575
4576 assert(isa<VPIRValue>(It->second) &&
4577 "Only VPIRValues should be in mapping");
4578 return It->second;
4579 }
4581 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4582 return getOrAddLiveIn(V->getValue());
4583 }
4584
4585 /// Return a VPIRValue wrapping i1 true.
4586 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4587
4588 /// Return a VPIRValue wrapping i1 false.
4589 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4590
4591 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4592 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4593 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4594 }
4595
4596 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4597 /// value.
4599 bool IsSigned = false) {
4600 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4601 }
4602
4603 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4605 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4606 }
4607
4608 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4609 /// otherwise.
4610 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4611
4612 /// Return the list of live-in VPValues available in the VPlan.
4613 auto getLiveIns() const { return LiveIns.values(); }
4614
4615#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4616 /// Print the live-ins of this VPlan to \p O.
4617 void printLiveIns(raw_ostream &O) const;
4618
4619 /// Print this VPlan to \p O.
4620 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4621
4622 /// Print this VPlan in DOT format to \p O.
4623 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4624
4625 /// Dump the plan to stderr (for debugging).
4626 LLVM_DUMP_METHOD void dump() const;
4627#endif
4628
4629 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4630 /// recipes to refer to the clones, and return it.
4632
4633 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4634 /// present. The returned block is owned by the VPlan and deleted once the
4635 /// VPlan is destroyed.
4637 VPRecipeBase *Recipe = nullptr) {
4638 auto *VPB = new VPBasicBlock(Name, Recipe);
4639 CreatedBlocks.push_back(VPB);
4640 return VPB;
4641 }
4642
4643 /// Create a new loop region with \p Name and entry and exiting blocks set
4644 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4645 /// owned by the VPlan and deleted once the VPlan is destroyed.
4646 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4647 VPBlockBase *Entry = nullptr,
4648 VPBlockBase *Exiting = nullptr) {
4649 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4650 : new VPRegionBlock(Name);
4651 CreatedBlocks.push_back(VPB);
4652 return VPB;
4653 }
4654
4655 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4656 /// returned block is owned by the VPlan and deleted once the VPlan is
4657 /// destroyed.
4659 const std::string &Name = "") {
4660 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4661 CreatedBlocks.push_back(VPB);
4662 return VPB;
4663 }
4664
4665 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4666 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4667 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4669
4670 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4671 /// instructions in \p IRBB, except its terminator which is managed by the
4672 /// successors of the block in VPlan. The returned block is owned by the VPlan
4673 /// and deleted once the VPlan is destroyed.
4675
4676 /// Returns true if the VPlan is based on a loop with an early exit. That is
4677 /// the case if the VPlan has either more than one exit block or a single exit
4678 /// block with multiple predecessors (one for the exit via the latch and one
4679 /// via the other early exit).
4680 bool hasEarlyExit() const {
4681 return count_if(ExitBlocks,
4682 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4683 1 ||
4684 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4685 }
4686
4687 /// Returns true if the scalar tail may execute after the vector loop. Note
4688 /// that this relies on unneeded branches to the scalar tail loop being
4689 /// removed.
4690 bool hasScalarTail() const {
4691 return !(!getScalarPreheader()->hasPredecessors() ||
4693 }
4694};
4695
4696#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4697inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4698 Plan.print(OS);
4699 return OS;
4700}
4701#endif
4702
4703} // end namespace llvm
4704
4705#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3660
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3654
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4009
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4037
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4084
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4039
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4036
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4062
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4020
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4026
iterator end()
Definition VPlan.h:4046
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4044
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4038
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4097
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:782
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:228
~VPBasicBlock() override
Definition VPlan.h:4030
const_reverse_iterator rbegin() const
Definition VPlan.h:4050
reverse_iterator rend()
Definition VPlan.h:4051
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4024
VPRecipeBase & back()
Definition VPlan.h:4059
const VPRecipeBase & front() const
Definition VPlan.h:4056
const_iterator begin() const
Definition VPlan.h:4045
VPRecipeBase & front()
Definition VPlan.h:4057
const VPRecipeBase & back() const
Definition VPlan.h:4058
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4075
bool empty() const
Definition VPlan.h:4055
const_iterator end() const
Definition VPlan.h:4047
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4070
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4065
reverse_iterator rbegin()
Definition VPlan.h:4049
friend class VPlan
Definition VPlan.h:4010
size_t size() const
Definition VPlan.h:4054
const_reverse_iterator rend() const
Definition VPlan.h:4052
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2587
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2592
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2582
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2603
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2612
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2569
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2564
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2598
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2578
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:657
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:220
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:192
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:212
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:178
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3084
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3068
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3092
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3065
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3595
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3597
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3622
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3602
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3629
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3616
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3619
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3610
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3636
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:356
VPDef(const unsigned char SC)
Definition VPlanValue.h:435
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:3810
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3802
VPValue * getStepValue() const
Definition VPlan.h:3811
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3774
Type * getScalarType() const
Definition VPlan.h:3808
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3790
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3814
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3782
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3710
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3691
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3697
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3703
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3686
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3570
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3575
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3561
const SCEV * getSCEV() const
Definition VPlan.h:3581
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3566
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3219
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3201
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3183
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3171
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3157
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3149
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3153
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3213
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3151
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2082
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2095
static bool classof(const VPValue *V)
Definition VPlan.h:2092
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2118
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2123
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2107
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2115
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2088
VPValue * getStartValue() const
Definition VPlan.h:2110
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2127
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2077
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1841
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1858
unsigned getOpcode() const
Definition VPlan.h:1854
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1835
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4162
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:457
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4186
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4176
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4163
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:482
Class to record and manage LLVM IR flags.
Definition VPlan.h:608
FastMathFlagsTy FMFs
Definition VPlan.h:695
ReductionFlagsTy ReductionFlags
Definition VPlan.h:697
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:765
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:756
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:748
WrapFlagsTy WrapFlags
Definition VPlan.h:689
CmpInst::Predicate CmpPredicate
Definition VPlan.h:688
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:742
GEPNoWrapFlags GEPFlags
Definition VPlan.h:693
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:882
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:932
TruncFlagsTy TruncFlags
Definition VPlan.h:690
CmpInst::Predicate getPredicate() const
Definition VPlan.h:859
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:890
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:769
ExactFlagsTy ExactFlags
Definition VPlan.h:692
bool hasNoSignedWrap() const
Definition VPlan.h:909
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:920
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:751
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:754
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:759
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:739
bool isNonNeg() const
Definition VPlan.h:892
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:874
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:877
DisjointFlagsTy DisjointFlags
Definition VPlan.h:691
unsigned AllFlags
Definition VPlan.h:698
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:865
bool hasNoUnsignedWrap() const
Definition VPlan.h:898
FCmpFlagsTy FCmpFlags
Definition VPlan.h:696
NonNegFlagsTy NonNegFlags
Definition VPlan.h:694
bool isReductionInLoop() const
Definition VPlan.h:938
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:779
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:816
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:762
RecurKind getRecurKind() const
Definition VPlan.h:926
VPIRFlags(Instruction &I)
Definition VPlan.h:704
Instruction & getInstruction() const
Definition VPlan.h:1512
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1520
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1499
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1526
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1514
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1487
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1032
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1068
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1040
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1052
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1326
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1367
static bool classof(const VPUser *R)
Definition VPlan.h:1352
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1334
Type * getResultType() const
Definition VPlan.h:1373
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1356
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1086
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1238
@ ExtractLastActive
Extracts the lane from the first operand corresponding to the last active (non-zero) lane in the mask...
Definition VPlan.h:1195
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1188
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1133
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1178
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1191
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1130
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1182
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1125
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1122
@ VScale
Returns the value for vscale.
Definition VPlan.h:1198
@ CanonicalIVIncrementForPart
Definition VPlan.h:1106
bool hasResult() const
Definition VPlan.h:1262
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1303
unsigned getOpcode() const
Definition VPlan.h:1246
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1306
friend class VPlanSlp
Definition VPlan.h:1087
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2699
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2705
static bool classof(const VPUser *U)
Definition VPlan.h:2681
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2647
Instruction * getInsertPos() const
Definition VPlan.h:2703
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2676
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2701
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2693
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2722
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2687
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2775
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2803
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2797
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2810
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2790
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2777
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2733
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2760
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2743
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2754
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2735
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1385
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1407
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1402
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4153
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1427
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1394
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1412
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1416
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3276
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3258
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3269
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3254
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4314
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:235
friend class VPDef
Definition VPlanValue.h:237
LLVM_ABI_FOR_TEST VPRecipeValue(VPDef *Def, Value *UV=nullptr)
Definition VPlan.cpp:139
friend class VPValue
Definition VPlanValue.h:236
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2960
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2939
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2963
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2950
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2525
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2511
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2490
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2504
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2537
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2519
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2528
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2542
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2479
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2534
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2522
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2826
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2835
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2902
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2871
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2886
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2913
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2915
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2898
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2849
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2900
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2856
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2904
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2911
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2906
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2865
static bool classof(const VPUser *U)
Definition VPlan.h:2876
static bool classof(const VPValue *VPV)
Definition VPlan.h:2881
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2920
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4197
const VPBlockBase * getEntry() const
Definition VPlan.h:4233
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4308
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4265
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4250
VPBlockBase * getExiting()
Definition VPlan.h:4246
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4295
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4238
const Type * getCanonicalIVType() const
Definition VPlan.h:4309
const VPBlockBase * getExiting() const
Definition VPlan.h:4245
VPBlockBase * getEntry()
Definition VPlan.h:4234
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4303
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4258
friend class VPlan
Definition VPlan.h:4198
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4229
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2982
bool isSingleScalar() const
Definition VPlan.h:3023
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2990
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3035
bool isPredicated() const
Definition VPlan.h:3025
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3004
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3028
unsigned getOpcode() const
Definition VPlan.h:3052
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3047
VPValue * getStepValue() const
Definition VPlan.h:3877
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3871
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3842
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3863
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3854
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3835
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3880
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:594
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:597
static bool classof(const VPUser *U)
Definition VPlan.h:586
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1020
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:253
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1428
operand_range operands()
Definition VPlanValue.h:321
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:297
unsigned getNumOperands() const
Definition VPlanValue.h:291
operand_iterator op_end()
Definition VPlanValue.h:319
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:292
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:272
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:315
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:314
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:47
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:133
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119
friend class VPRecipeValue
Definition VPlanValue.h:53
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:175
unsigned getNumUsers() const
Definition VPlanValue.h:107
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1957
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1978
const VPValue * getVFValue() const
Definition VPlan.h:1953
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1971
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1964
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1942
Type * getSourceElementType() const
Definition VPlan.h:2012
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2014
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2021
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1999
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2037
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2028
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1775
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1782
const_operand_range args() const
Definition VPlan.h:1815
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1796
operand_range args()
Definition VPlan.h:1814
Function * getCalledScalarFunction() const
Definition VPlan.h:1810
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3746
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3733
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3728
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1625
Instruction::CastOps getOpcode() const
Definition VPlan.h:1661
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1664
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1633
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1646
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1905
Type * getSourceElementType() const
Definition VPlan.h:1910
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1913
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1897
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1883
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2207
static bool classof(const VPValue *V)
Definition VPlan.h:2158
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2177
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2192
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2170
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2185
PHINode * getPHINode() const
Definition VPlan.h:2187
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2146
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2173
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2190
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2199
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2153
const VPValue * getVFValue() const
Definition VPlan.h:2180
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2163
const VPValue * getStepValue() const
Definition VPlan.h:2174
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2268
const TruncInst * getTruncInst() const
Definition VPlan.h:2284
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2262
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2272
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2254
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2228
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2283
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2237
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2299
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2279
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2292
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1675
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1706
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1746
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1755
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1692
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1761
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1727
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1758
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1749
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3307
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3304
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3347
static bool classof(const VPUser *U)
Definition VPlan.h:3341
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3370
Instruction & Ingredient
Definition VPlan.h:3295
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3330
Instruction & getIngredient() const
Definition VPlan.h:3378
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3301
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3334
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3361
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3298
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3357
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3317
void setMask(VPValue *Mask)
Definition VPlan.h:3309
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3367
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3354
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3351
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2397
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2364
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2371
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2326
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2335
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2316
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1577
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1591
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1616
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1581
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1606
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4327
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4610
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1117
friend class VPSlotTracker
Definition VPlan.h:4329
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1093
bool hasVF(ElementCount VF) const
Definition VPlan.h:4524
LLVMContext & getContext() const
Definition VPlan.h:4512
VPBasicBlock * getEntry()
Definition VPlan.h:4416
void setName(const Twine &newName)
Definition VPlan.h:4562
bool hasScalableVF() const
Definition VPlan.h:4525
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4510
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4506
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4474
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4495
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4531
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:901
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:879
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4580
const VPValue & getVF() const
Definition VPlan.h:4507
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:909
const VPBasicBlock * getEntry() const
Definition VPlan.h:4417
friend class VPlanPrinter
Definition VPlan.h:4328
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4589
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4604
unsigned getUF() const
Definition VPlan.h:4544
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4658
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1228
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4613
bool hasUF(unsigned UF) const
Definition VPlan.h:4542
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4464
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4503
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4500
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4566
void setVF(ElementCount VF)
Definition VPlan.h:4518
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4557
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4680
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1004
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4598
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4450
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4481
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4488
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4441
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4405
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4636
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1234
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4586
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4646
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1123
bool hasScalarVFOnly() const
Definition VPlan.h:4535
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4455
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:916
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1076
void addVF(ElementCount VF)
Definition VPlan.h:4516
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4460
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1038
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4421
void setUF(unsigned UF)
Definition VPlan.h:4549
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4690
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1164
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4398
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4592
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2497
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3943
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2451
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2449
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3974
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3988
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3993
static bool isPossible(SrcTy R)
Definition VPlan.h:3975
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3905
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3926
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3907
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3910
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3897
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2443
Possible variants of a reduction.
Definition VPlan.h:2441
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2446
unsigned VFScaleFactor
Definition VPlan.h:2447
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:204
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2412
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2424
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2404
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1558
VPIRPhi(PHINode &PN)
Definition VPlan.h:1551
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1553
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1569
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:186
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:137
static bool classof(const VPUser *U)
Definition VPlan.h:1445
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1460
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1475
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1442
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1455
static bool classof(const VPValue *V)
Definition VPlan.h:1450
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:974
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:980
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:975
static bool classof(const VPValue *V)
Definition VPlan.h:999
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:1006
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:994
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:226
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3426
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3439
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3427
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3449
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3384
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3406
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3385
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3394
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3510
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3522
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3511
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3535
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3525
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3467
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3485
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3476
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3491
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3468