LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPBlendSC:
561 case VPRecipeBase::VPPredInstPHISC:
562 case VPRecipeBase::VPCanonicalIVPHISC:
563 case VPRecipeBase::VPActiveLaneMaskPHISC:
564 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565 case VPRecipeBase::VPWidenPHISC:
566 case VPRecipeBase::VPWidenIntOrFpInductionSC:
567 case VPRecipeBase::VPWidenPointerInductionSC:
568 case VPRecipeBase::VPReductionPHISC:
569 return true;
570 case VPRecipeBase::VPBranchOnMaskSC:
571 case VPRecipeBase::VPInterleaveEVLSC:
572 case VPRecipeBase::VPInterleaveSC:
573 case VPRecipeBase::VPIRInstructionSC:
574 case VPRecipeBase::VPWidenLoadEVLSC:
575 case VPRecipeBase::VPWidenLoadSC:
576 case VPRecipeBase::VPWidenStoreEVLSC:
577 case VPRecipeBase::VPWidenStoreSC:
578 case VPRecipeBase::VPHistogramSC:
579 // TODO: Widened stores don't define a value, but widened loads do. Split
580 // the recipes to be able to make widened loads VPSingleDefRecipes.
581 return false;
582 }
583 llvm_unreachable("Unhandled VPDefID");
584 }
585
586 static inline bool classof(const VPUser *U) {
587 auto *R = dyn_cast<VPRecipeBase>(U);
588 return R && classof(R);
589 }
590
591 VPSingleDefRecipe *clone() override = 0;
592
593 /// Returns the underlying instruction.
600
601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
602 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
604#endif
605};
606
607/// Class to record and manage LLVM IR flags.
609 enum class OperationType : unsigned char {
610 Cmp,
611 FCmp,
612 OverflowingBinOp,
613 Trunc,
614 DisjointOp,
615 PossiblyExactOp,
616 GEPOp,
617 FPMathOp,
618 NonNegOp,
619 ReductionOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
670 struct ReductionFlagsTy {
671 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
672 // additional kinds.
673 unsigned char Kind : 6;
674 // TODO: Derive order/in-loop from plan and remove here.
675 unsigned char IsOrdered : 1;
676 unsigned char IsInLoop : 1;
677 FastMathFlagsTy FMFs;
678
679 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
680 FastMathFlags FMFs)
681 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
682 IsInLoop(IsInLoop), FMFs(FMFs) {}
683 };
684
685 OperationType OpType;
686
687 union {
692 ExactFlagsTy ExactFlags;
695 FastMathFlagsTy FMFs;
696 FCmpFlagsTy FCmpFlags;
697 ReductionFlagsTy ReductionFlags;
698 unsigned AllFlags;
699 };
700
701public:
702 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
703
705 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
706 OpType = OperationType::FCmp;
707 FCmpFlags.Pred = FCmp->getPredicate();
708 FCmpFlags.FMFs = FCmp->getFastMathFlags();
709 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
710 OpType = OperationType::Cmp;
711 CmpPredicate = Op->getPredicate();
712 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
713 OpType = OperationType::DisjointOp;
714 DisjointFlags.IsDisjoint = Op->isDisjoint();
715 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
716 OpType = OperationType::OverflowingBinOp;
717 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
718 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
719 OpType = OperationType::Trunc;
720 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
721 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
722 OpType = OperationType::PossiblyExactOp;
723 ExactFlags.IsExact = Op->isExact();
724 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
725 OpType = OperationType::GEPOp;
726 GEPFlags = GEP->getNoWrapFlags();
727 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
728 OpType = OperationType::NonNegOp;
729 NonNegFlags.NonNeg = PNNI->hasNonNeg();
730 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
731 OpType = OperationType::FPMathOp;
732 FMFs = Op->getFastMathFlags();
733 } else {
734 OpType = OperationType::Other;
735 AllFlags = 0;
736 }
737 }
738
740 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
741
743 : OpType(OperationType::FCmp) {
744 FCmpFlags.Pred = Pred;
745 FCmpFlags.FMFs = FMFs;
746 }
747
749 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
750
752 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
753
754 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
755
757 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
758
760 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
761
763 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
764
765 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
766 : OpType(OperationType::ReductionOp),
767 ReductionFlags(Kind, IsOrdered, IsInLoop, FMFs) {}
768
770 OpType = Other.OpType;
771 AllFlags = Other.AllFlags;
772 }
773
774 /// Only keep flags also present in \p Other. \p Other must have the same
775 /// OpType as the current object.
776 void intersectFlags(const VPIRFlags &Other);
777
778 /// Drop all poison-generating flags.
780 // NOTE: This needs to be kept in-sync with
781 // Instruction::dropPoisonGeneratingFlags.
782 switch (OpType) {
783 case OperationType::OverflowingBinOp:
784 WrapFlags.HasNUW = false;
785 WrapFlags.HasNSW = false;
786 break;
787 case OperationType::Trunc:
788 TruncFlags.HasNUW = false;
789 TruncFlags.HasNSW = false;
790 break;
791 case OperationType::DisjointOp:
792 DisjointFlags.IsDisjoint = false;
793 break;
794 case OperationType::PossiblyExactOp:
795 ExactFlags.IsExact = false;
796 break;
797 case OperationType::GEPOp:
799 break;
800 case OperationType::FPMathOp:
801 case OperationType::FCmp:
802 case OperationType::ReductionOp:
803 getFMFsRef().NoNaNs = false;
804 getFMFsRef().NoInfs = false;
805 break;
806 case OperationType::NonNegOp:
807 NonNegFlags.NonNeg = false;
808 break;
809 case OperationType::Cmp:
810 case OperationType::Other:
811 break;
812 }
813 }
814
815 /// Apply the IR flags to \p I.
816 void applyFlags(Instruction &I) const {
817 switch (OpType) {
818 case OperationType::OverflowingBinOp:
819 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
820 I.setHasNoSignedWrap(WrapFlags.HasNSW);
821 break;
822 case OperationType::Trunc:
823 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
824 I.setHasNoSignedWrap(TruncFlags.HasNSW);
825 break;
826 case OperationType::DisjointOp:
827 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
828 break;
829 case OperationType::PossiblyExactOp:
830 I.setIsExact(ExactFlags.IsExact);
831 break;
832 case OperationType::GEPOp:
833 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
834 break;
835 case OperationType::FPMathOp:
836 case OperationType::FCmp: {
837 const FastMathFlagsTy &F = getFMFsRef();
838 I.setHasAllowReassoc(F.AllowReassoc);
839 I.setHasNoNaNs(F.NoNaNs);
840 I.setHasNoInfs(F.NoInfs);
841 I.setHasNoSignedZeros(F.NoSignedZeros);
842 I.setHasAllowReciprocal(F.AllowReciprocal);
843 I.setHasAllowContract(F.AllowContract);
844 I.setHasApproxFunc(F.ApproxFunc);
845 break;
846 }
847 case OperationType::NonNegOp:
848 I.setNonNeg(NonNegFlags.NonNeg);
849 break;
850 case OperationType::ReductionOp:
851 llvm_unreachable("reduction ops should not use applyFlags");
852 case OperationType::Cmp:
853 case OperationType::Other:
854 break;
855 }
856 }
857
859 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
860 "recipe doesn't have a compare predicate");
861 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
862 }
863
865 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
866 "recipe doesn't have a compare predicate");
867 if (OpType == OperationType::FCmp)
868 FCmpFlags.Pred = Pred;
869 else
870 CmpPredicate = Pred;
871 }
872
874
875 /// Returns true if the recipe has a comparison predicate.
876 bool hasPredicate() const {
877 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
878 }
879
880 /// Returns true if the recipe has fast-math flags.
881 bool hasFastMathFlags() const {
882 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
883 OpType == OperationType::ReductionOp;
884 }
885
887
888 /// Returns true if the recipe has non-negative flag.
889 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
890
891 bool isNonNeg() const {
892 assert(OpType == OperationType::NonNegOp &&
893 "recipe doesn't have a NNEG flag");
894 return NonNegFlags.NonNeg;
895 }
896
897 bool hasNoUnsignedWrap() const {
898 switch (OpType) {
899 case OperationType::OverflowingBinOp:
900 return WrapFlags.HasNUW;
901 case OperationType::Trunc:
902 return TruncFlags.HasNUW;
903 default:
904 llvm_unreachable("recipe doesn't have a NUW flag");
905 }
906 }
907
908 bool hasNoSignedWrap() const {
909 switch (OpType) {
910 case OperationType::OverflowingBinOp:
911 return WrapFlags.HasNSW;
912 case OperationType::Trunc:
913 return TruncFlags.HasNSW;
914 default:
915 llvm_unreachable("recipe doesn't have a NSW flag");
916 }
917 }
918
919 bool isDisjoint() const {
920 assert(OpType == OperationType::DisjointOp &&
921 "recipe cannot have a disjoing flag");
922 return DisjointFlags.IsDisjoint;
923 }
924
926 assert(OpType == OperationType::ReductionOp &&
927 "recipe doesn't have reduction flags");
928 return static_cast<RecurKind>(ReductionFlags.Kind);
929 }
930
931 bool isReductionOrdered() const {
932 assert(OpType == OperationType::ReductionOp &&
933 "recipe doesn't have reduction flags");
934 return ReductionFlags.IsOrdered;
935 }
936
937 bool isReductionInLoop() const {
938 assert(OpType == OperationType::ReductionOp &&
939 "recipe doesn't have reduction flags");
940 return ReductionFlags.IsInLoop;
941 }
942
943private:
944 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
945 FastMathFlagsTy &getFMFsRef() {
946 if (OpType == OperationType::FCmp)
947 return FCmpFlags.FMFs;
948 if (OpType == OperationType::ReductionOp)
949 return ReductionFlags.FMFs;
950 return FMFs;
951 }
952 const FastMathFlagsTy &getFMFsRef() const {
953 if (OpType == OperationType::FCmp)
954 return FCmpFlags.FMFs;
955 if (OpType == OperationType::ReductionOp)
956 return ReductionFlags.FMFs;
957 return FMFs;
958 }
959
960public:
961#if !defined(NDEBUG)
962 /// Returns true if the set flags are valid for \p Opcode.
963 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
964#endif
965
966#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
967 void printFlags(raw_ostream &O) const;
968#endif
969};
970
971/// A pure-virtual common base class for recipes defining a single VPValue and
972/// using IR flags.
974 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
975 const VPIRFlags &Flags,
977 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
978
979 static inline bool classof(const VPRecipeBase *R) {
980 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
981 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
982 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
983 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
984 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
985 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
986 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
987 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
988 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
989 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
990 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
991 }
992
993 static inline bool classof(const VPUser *U) {
994 auto *R = dyn_cast<VPRecipeBase>(U);
995 return R && classof(R);
996 }
997
998 static inline bool classof(const VPValue *V) {
999 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
1000 return R && classof(R);
1001 }
1002
1004
1005 static inline bool classof(const VPSingleDefRecipe *U) {
1006 auto *R = dyn_cast<VPRecipeBase>(U);
1007 return R && classof(R);
1008 }
1009
1010 void execute(VPTransformState &State) override = 0;
1011
1012 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1014 VPCostContext &Ctx) const;
1015};
1016
1017/// Helper to access the operand that contains the unroll part for this recipe
1018/// after unrolling.
1019template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1020protected:
1021 /// Return the VPValue operand containing the unroll part or null if there is
1022 /// no such operand.
1023 VPValue *getUnrollPartOperand(const VPUser &U) const;
1024
1025 /// Return the unroll part.
1026 unsigned getUnrollPart(const VPUser &U) const;
1027};
1028
1029/// Helper to manage IR metadata for recipes. It filters out metadata that
1030/// cannot be propagated.
1033
1034public:
1035 VPIRMetadata() = default;
1036
1037 /// Adds metatadata that can be preserved from the original instruction
1038 /// \p I.
1040
1041 /// Copy constructor for cloning.
1043
1045
1046 /// Add all metadata to \p I.
1047 void applyMetadata(Instruction &I) const;
1048
1049 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1050 /// already exists, it will be replaced. Otherwise, it will be added.
1051 void setMetadata(unsigned Kind, MDNode *Node) {
1052 auto It =
1053 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1054 return P.first == Kind;
1055 });
1056 if (It != Metadata.end())
1057 It->second = Node;
1058 else
1059 Metadata.emplace_back(Kind, Node);
1060 }
1061
1062 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1063 /// nodes that are common to both.
1064 void intersect(const VPIRMetadata &MD);
1065
1066 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1067 MDNode *getMetadata(unsigned Kind) const {
1068 auto It =
1069 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1070 return It != Metadata.end() ? It->second : nullptr;
1071 }
1072
1073#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1074 /// Print metadata with node IDs.
1075 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1076#endif
1077};
1078
1079/// This is a concrete Recipe that models a single VPlan-level instruction.
1080/// While as any Recipe it may generate a sequence of IR instructions when
1081/// executed, these instructions would always form a single-def expression as
1082/// the VPInstruction is also a single def-use vertex.
1084 public VPIRMetadata,
1085 public VPUnrollPartAccessor<1> {
1086 friend class VPlanSlp;
1087
1088public:
1089 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1090 enum {
1092 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1093 // values of a first-order recurrence.
1097 // Creates a mask where each lane is active (true) whilst the current
1098 // counter (first operand + index) is less than the second operand. i.e.
1099 // mask[i] = icmpt ult (op0 + i), op1
1100 // The size of the mask returned is VF * Multiplier (UF, third op).
1104 // Increment the canonical IV separately for each unrolled part.
1106 // Abstract instruction that compares two values and branches. This is
1107 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1110 // Branch with 2 boolean condition operands and 3 successors. If condition
1111 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1112 // successor 1; otherwise branches to successor 2. Expanded after region
1113 // dissolution into: (1) an OR of the two conditions branching to
1114 // middle.split or successor 2, and (2) middle.split branching to successor
1115 // 0 or successor 1 based on condition 0.
1118 /// Given operands of (the same) struct type, creates a struct of fixed-
1119 /// width vectors each containing a struct field of all operands. The
1120 /// number of operands matches the element count of every vector.
1122 /// Creates a fixed-width vector containing all operands. The number of
1123 /// operands matches the vector element count.
1125 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1126 /// abstract VPInstruction whose single defined VPValue represents VF
1127 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1128 /// VPInstructions.
1130 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1131 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1134 // Extracts the last part of its operand. Removed during unrolling.
1136 // Extracts the last lane of its vector operand, per part.
1138 // Extracts the second-to-last lane from its operand or the second-to-last
1139 // part if it is scalar. In the latter case, the recipe will be removed
1140 // during unrolling.
1142 LogicalAnd, // Non-poison propagating logical And.
1143 // Add an offset in bytes (second operand) to a base pointer (first
1144 // operand). Only generates scalar values (either for the first lane only or
1145 // for all lanes, depending on its uses).
1147 // Add a vector offset in bytes (second operand) to a scalar base pointer
1148 // (first operand).
1150 // Returns a scalar boolean value, which is true if any lane of its
1151 // (boolean) vector operands is true. It produces the reduced value across
1152 // all unrolled iterations. Unrolling will add all copies of its original
1153 // operand as additional operands. AnyOf is poison-safe as all operands
1154 // will be frozen.
1156 // Calculates the first active lane index of the vector predicate operands.
1157 // It produces the lane index across all unrolled iterations. Unrolling will
1158 // add all copies of its original operand as additional operands.
1159 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1160 // result even with operands that are all zeroes.
1162 // Calculates the last active lane index of the vector predicate operands.
1163 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1164 // tail-folding to extract the correct live-out value from the last active
1165 // iteration. It produces the lane index across all unrolled iterations.
1166 // Unrolling will add all copies of its original operand as additional
1167 // operands.
1169 // Returns a reversed vector for the operand.
1171
1172 // The opcodes below are used for VPInstructionWithType.
1173 //
1174 /// Scale the first operand (vector step) by the second operand
1175 /// (scalar-step). Casts both operands to the result type if needed.
1177 /// Start vector for reductions with 3 operands: the original start value,
1178 /// the identity value for the reduction and an integer indicating the
1179 /// scaling factor.
1181 // Creates a step vector starting from 0 to VF with a step of 1.
1183 /// Extracts a single lane (first operand) from a set of vector operands.
1184 /// The lane specifies an index into a vector formed by combining all vector
1185 /// operands (all operands after the first one).
1187 /// Explicit user for the resume phi of the canonical induction in the main
1188 /// VPlan, used by the epilogue vector loop.
1190 /// Extracts the lane from the first operand corresponding to the last
1191 /// active (non-zero) lane in the mask (second operand), or if no lanes
1192 /// were active in the mask, returns the default value (third operand).
1194
1195 /// Returns the value for vscale.
1198 };
1199
1200 /// Returns true if this VPInstruction generates scalar values for all lanes.
1201 /// Most VPInstructions generate a single value per part, either vector or
1202 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1203 /// values per all lanes, stemming from an original ingredient. This method
1204 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1205 /// underlying ingredient.
1206 bool doesGeneratePerAllLanes() const;
1207
1208 /// Return the number of operands determined by the opcode of the
1209 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1210 /// directly by the opcode.
1211 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1212
1213private:
1214 typedef unsigned char OpcodeTy;
1215 OpcodeTy Opcode;
1216
1217 /// An optional name that can be used for the generated IR instruction.
1218 std::string Name;
1219
1220 /// Returns true if we can generate a scalar for the first lane only if
1221 /// needed.
1222 bool canGenerateScalarForFirstLane() const;
1223
1224 /// Utility methods serving execute(): generates a single vector instance of
1225 /// the modeled instruction. \returns the generated value. . In some cases an
1226 /// existing value is returned rather than a generated one.
1227 Value *generate(VPTransformState &State);
1228
1229public:
1230 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1231 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1232 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1233
1234 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1235
1236 VPInstruction *clone() override {
1237 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1238 getDebugLoc(), Name);
1239 if (getUnderlyingValue())
1240 New->setUnderlyingValue(getUnderlyingInstr());
1241 return New;
1242 }
1243
1244 unsigned getOpcode() const { return Opcode; }
1245
1246 /// Generate the instruction.
1247 /// TODO: We currently execute only per-part unless a specific instance is
1248 /// provided.
1249 void execute(VPTransformState &State) override;
1250
1251 /// Return the cost of this VPInstruction.
1252 InstructionCost computeCost(ElementCount VF,
1253 VPCostContext &Ctx) const override;
1254
1255#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1256 /// Print the VPInstruction to dbgs() (for debugging).
1257 LLVM_DUMP_METHOD void dump() const;
1258#endif
1259
1260 bool hasResult() const {
1261 // CallInst may or may not have a result, depending on the called function.
1262 // Conservatively return calls have results for now.
1263 switch (getOpcode()) {
1264 case Instruction::Ret:
1265 case Instruction::Br:
1266 case Instruction::Store:
1267 case Instruction::Switch:
1268 case Instruction::IndirectBr:
1269 case Instruction::Resume:
1270 case Instruction::CatchRet:
1271 case Instruction::Unreachable:
1272 case Instruction::Fence:
1273 case Instruction::AtomicRMW:
1277 return false;
1278 default:
1279 return true;
1280 }
1281 }
1282
1283 /// Returns true if the underlying opcode may read from or write to memory.
1284 bool opcodeMayReadOrWriteFromMemory() const;
1285
1286 /// Returns true if the recipe only uses the first lane of operand \p Op.
1287 bool usesFirstLaneOnly(const VPValue *Op) const override;
1288
1289 /// Returns true if the recipe only uses the first part of operand \p Op.
1290 bool usesFirstPartOnly(const VPValue *Op) const override;
1291
1292 /// Returns true if this VPInstruction produces a scalar value from a vector,
1293 /// e.g. by performing a reduction or extracting a lane.
1294 bool isVectorToScalar() const;
1295
1296 /// Returns true if this VPInstruction's operands are single scalars and the
1297 /// result is also a single scalar.
1298 bool isSingleScalar() const;
1299
1300 /// Returns the symbolic name assigned to the VPInstruction.
1301 StringRef getName() const { return Name; }
1302
1303 /// Set the symbolic name for the VPInstruction.
1304 void setName(StringRef NewName) { Name = NewName.str(); }
1305
1306protected:
1307#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1308 /// Print the VPInstruction to \p O.
1309 void printRecipe(raw_ostream &O, const Twine &Indent,
1310 VPSlotTracker &SlotTracker) const override;
1311#endif
1312};
1313
1314/// A specialization of VPInstruction augmenting it with a dedicated result
1315/// type, to be used when the opcode and operands of the VPInstruction don't
1316/// directly determine the result type. Note that there is no separate VPDef ID
1317/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1318/// distinguished purely by the opcode.
1320 /// Scalar result type produced by the recipe.
1321 Type *ResultTy;
1322
1323public:
1325 Type *ResultTy, const VPIRFlags &Flags = {},
1326 const VPIRMetadata &Metadata = {},
1328 const Twine &Name = "")
1329 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1330 ResultTy(ResultTy) {}
1331
1332 static inline bool classof(const VPRecipeBase *R) {
1333 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1334 // type information.
1335 if (R->isScalarCast())
1336 return true;
1337 auto *VPI = dyn_cast<VPInstruction>(R);
1338 if (!VPI)
1339 return false;
1340 switch (VPI->getOpcode()) {
1344 return true;
1345 default:
1346 return false;
1347 }
1348 }
1349
1350 static inline bool classof(const VPUser *R) {
1352 }
1353
1354 VPInstruction *clone() override {
1355 auto *New =
1357 *this, *this, getDebugLoc(), getName());
1358 New->setUnderlyingValue(getUnderlyingValue());
1359 return New;
1360 }
1361
1362 void execute(VPTransformState &State) override;
1363
1364 /// Return the cost of this VPInstruction.
1366 VPCostContext &Ctx) const override {
1367 // TODO: Compute accurate cost after retiring the legacy cost model.
1368 return 0;
1369 }
1370
1371 Type *getResultType() const { return ResultTy; }
1372
1373protected:
1374#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1375 /// Print the recipe.
1376 void printRecipe(raw_ostream &O, const Twine &Indent,
1377 VPSlotTracker &SlotTracker) const override;
1378#endif
1379};
1380
1381/// Helper type to provide functions to access incoming values and blocks for
1382/// phi-like recipes.
1384protected:
1385 /// Return a VPRecipeBase* to the current object.
1386 virtual const VPRecipeBase *getAsRecipe() const = 0;
1387
1388public:
1389 virtual ~VPPhiAccessors() = default;
1390
1391 /// Returns the incoming VPValue with index \p Idx.
1392 VPValue *getIncomingValue(unsigned Idx) const {
1393 return getAsRecipe()->getOperand(Idx);
1394 }
1395
1396 /// Returns the incoming block with index \p Idx.
1397 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1398
1399 /// Returns the number of incoming values, also number of incoming blocks.
1400 virtual unsigned getNumIncoming() const {
1401 return getAsRecipe()->getNumOperands();
1402 }
1403
1404 /// Returns an interator range over the incoming values.
1406 return make_range(getAsRecipe()->op_begin(),
1407 getAsRecipe()->op_begin() + getNumIncoming());
1408 }
1409
1411 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1412
1413 /// Returns an iterator range over the incoming blocks.
1415 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1416 return getIncomingBlock(Idx);
1417 };
1418 return map_range(index_range(0, getNumIncoming()), GetBlock);
1419 }
1420
1421 /// Returns an iterator range over pairs of incoming values and corresponding
1422 /// incoming blocks.
1428
1429 /// Removes the incoming value for \p IncomingBlock, which must be a
1430 /// predecessor.
1431 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1432
1433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434 /// Print the recipe.
1436#endif
1437};
1438
1440 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1441 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1442
1443 static inline bool classof(const VPUser *U) {
1444 auto *VPI = dyn_cast<VPInstruction>(U);
1445 return VPI && VPI->getOpcode() == Instruction::PHI;
1446 }
1447
1448 static inline bool classof(const VPValue *V) {
1449 auto *VPI = dyn_cast<VPInstruction>(V);
1450 return VPI && VPI->getOpcode() == Instruction::PHI;
1451 }
1452
1453 static inline bool classof(const VPSingleDefRecipe *SDR) {
1454 auto *VPI = dyn_cast<VPInstruction>(SDR);
1455 return VPI && VPI->getOpcode() == Instruction::PHI;
1456 }
1457
1458 VPPhi *clone() override {
1459 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1460 PhiR->setUnderlyingValue(getUnderlyingValue());
1461 return PhiR;
1462 }
1463
1464 void execute(VPTransformState &State) override;
1465
1466protected:
1467#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1468 /// Print the recipe.
1469 void printRecipe(raw_ostream &O, const Twine &Indent,
1470 VPSlotTracker &SlotTracker) const override;
1471#endif
1472
1473 const VPRecipeBase *getAsRecipe() const override { return this; }
1474};
1475
1476/// A recipe to wrap on original IR instruction not to be modified during
1477/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1478/// Expect PHIs, VPIRInstructions cannot have any operands.
1480 Instruction &I;
1481
1482protected:
1483 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1484 /// subclasses may need to be created, e.g. VPIRPhi.
1486 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1487
1488public:
1489 ~VPIRInstruction() override = default;
1490
1491 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1492 /// VPIRInstruction.
1494
1495 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1496
1498 auto *R = create(I);
1499 for (auto *Op : operands())
1500 R->addOperand(Op);
1501 return R;
1502 }
1503
1504 void execute(VPTransformState &State) override;
1505
1506 /// Return the cost of this VPIRInstruction.
1508 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1509
1510 Instruction &getInstruction() const { return I; }
1511
1512 bool usesScalars(const VPValue *Op) const override {
1514 "Op must be an operand of the recipe");
1515 return true;
1516 }
1517
1518 bool usesFirstPartOnly(const VPValue *Op) const override {
1520 "Op must be an operand of the recipe");
1521 return true;
1522 }
1523
1524 bool usesFirstLaneOnly(const VPValue *Op) const override {
1526 "Op must be an operand of the recipe");
1527 return true;
1528 }
1529
1530 /// Update the recipe's first operand to the last lane of the last part of the
1531 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1532 /// least one operand wrapping a PHINode.
1534
1535protected:
1536#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1537 /// Print the recipe.
1538 void printRecipe(raw_ostream &O, const Twine &Indent,
1539 VPSlotTracker &SlotTracker) const override;
1540#endif
1541};
1542
1543/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1544/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1545/// allowed, and it is used to add a new incoming value for the single
1546/// predecessor VPBB.
1548 public VPPhiAccessors {
1550
1551 static inline bool classof(const VPRecipeBase *U) {
1552 auto *R = dyn_cast<VPIRInstruction>(U);
1553 return R && isa<PHINode>(R->getInstruction());
1554 }
1555
1557
1558 void execute(VPTransformState &State) override;
1559
1560protected:
1561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1562 /// Print the recipe.
1563 void printRecipe(raw_ostream &O, const Twine &Indent,
1564 VPSlotTracker &SlotTracker) const override;
1565#endif
1566
1567 const VPRecipeBase *getAsRecipe() const override { return this; }
1568};
1569
1570/// VPWidenRecipe is a recipe for producing a widened instruction using the
1571/// opcode and operands of the recipe. This recipe covers most of the
1572/// traditional vectorization cases where each recipe transforms into a
1573/// vectorized version of itself.
1575 public VPIRMetadata {
1576 unsigned Opcode;
1577
1578public:
1580 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1581 DebugLoc DL = {})
1582 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1583 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1584 setUnderlyingValue(&I);
1585 }
1586
1587 ~VPWidenRecipe() override = default;
1588
1589 VPWidenRecipe *clone() override {
1590 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1591 getDebugLoc());
1592 }
1593
1594 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1595
1596 /// Produce a widened instruction using the opcode and operands of the recipe,
1597 /// processing State.VF elements.
1598 void execute(VPTransformState &State) override;
1599
1600 /// Return the cost of this VPWidenRecipe.
1601 InstructionCost computeCost(ElementCount VF,
1602 VPCostContext &Ctx) const override;
1603
1604 unsigned getOpcode() const { return Opcode; }
1605
1606protected:
1607#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1608 /// Print the recipe.
1609 void printRecipe(raw_ostream &O, const Twine &Indent,
1610 VPSlotTracker &SlotTracker) const override;
1611#endif
1612
1613 /// Returns true if the recipe only uses the first lane of operand \p Op.
1614 bool usesFirstLaneOnly(const VPValue *Op) const override {
1616 "Op must be an operand of the recipe");
1617 return Opcode == Instruction::Select && Op == getOperand(0) &&
1618 Op->isDefinedOutsideLoopRegions();
1619 }
1620};
1621
1622/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1624 /// Cast instruction opcode.
1625 Instruction::CastOps Opcode;
1626
1627 /// Result type for the cast.
1628 Type *ResultTy;
1629
1630public:
1632 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1633 const VPIRMetadata &Metadata = {},
1635 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1636 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1637 assert(flagsValidForOpcode(Opcode) &&
1638 "Set flags not supported for the provided opcode");
1640 }
1641
1642 ~VPWidenCastRecipe() override = default;
1643
1645 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1647 *this, *this, getDebugLoc());
1648 }
1649
1650 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1651
1652 /// Produce widened copies of the cast.
1653 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1654
1655 /// Return the cost of this VPWidenCastRecipe.
1657 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1658
1659 Instruction::CastOps getOpcode() const { return Opcode; }
1660
1661 /// Returns the result type of the cast.
1662 Type *getResultType() const { return ResultTy; }
1663
1664protected:
1665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1666 /// Print the recipe.
1667 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1668 VPSlotTracker &SlotTracker) const override;
1669#endif
1670};
1671
1672/// A recipe for widening vector intrinsics.
1674 /// ID of the vector intrinsic to widen.
1675 Intrinsic::ID VectorIntrinsicID;
1676
1677 /// Scalar return type of the intrinsic.
1678 Type *ResultTy;
1679
1680 /// True if the intrinsic may read from memory.
1681 bool MayReadFromMemory;
1682
1683 /// True if the intrinsic may read write to memory.
1684 bool MayWriteToMemory;
1685
1686 /// True if the intrinsic may have side-effects.
1687 bool MayHaveSideEffects;
1688
1689public:
1691 ArrayRef<VPValue *> CallArguments, Type *Ty,
1692 const VPIRFlags &Flags = {},
1693 const VPIRMetadata &MD = {},
1695 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1696 DL),
1697 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1698 MayReadFromMemory(CI.mayReadFromMemory()),
1699 MayWriteToMemory(CI.mayWriteToMemory()),
1700 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1701 setUnderlyingValue(&CI);
1702 }
1703
1705 ArrayRef<VPValue *> CallArguments, Type *Ty,
1706 const VPIRFlags &Flags = {},
1707 const VPIRMetadata &Metadata = {},
1709 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1710 DL),
1711 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1712 ResultTy(Ty) {
1713 LLVMContext &Ctx = Ty->getContext();
1714 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1715 MemoryEffects ME = Attrs.getMemoryEffects();
1716 MayReadFromMemory = !ME.onlyWritesMemory();
1717 MayWriteToMemory = !ME.onlyReadsMemory();
1718 MayHaveSideEffects = MayWriteToMemory ||
1719 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1720 !Attrs.hasAttribute(Attribute::WillReturn);
1721 }
1722
1723 ~VPWidenIntrinsicRecipe() override = default;
1724
1726 if (Value *CI = getUnderlyingValue())
1727 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1728 operands(), ResultTy, *this, *this,
1729 getDebugLoc());
1730 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1731 *this, *this, getDebugLoc());
1732 }
1733
1734 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1735
1736 /// Produce a widened version of the vector intrinsic.
1737 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1738
1739 /// Return the cost of this vector intrinsic.
1741 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1742
1743 /// Return the ID of the intrinsic.
1744 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1745
1746 /// Return the scalar return type of the intrinsic.
1747 Type *getResultType() const { return ResultTy; }
1748
1749 /// Return to name of the intrinsic as string.
1751
1752 /// Returns true if the intrinsic may read from memory.
1753 bool mayReadFromMemory() const { return MayReadFromMemory; }
1754
1755 /// Returns true if the intrinsic may write to memory.
1756 bool mayWriteToMemory() const { return MayWriteToMemory; }
1757
1758 /// Returns true if the intrinsic may have side-effects.
1759 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1760
1761 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1762
1763protected:
1764#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1765 /// Print the recipe.
1766 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1767 VPSlotTracker &SlotTracker) const override;
1768#endif
1769};
1770
1771/// A recipe for widening Call instructions using library calls.
1773 public VPIRMetadata {
1774 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1775 /// between a given VF and the chosen vectorized variant, so there will be a
1776 /// different VPlan for each VF with a valid variant.
1777 Function *Variant;
1778
1779public:
1781 ArrayRef<VPValue *> CallArguments,
1782 const VPIRFlags &Flags = {},
1783 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1784 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1785 VPIRMetadata(Metadata), Variant(Variant) {
1786 setUnderlyingValue(UV);
1787 assert(
1788 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1789 "last operand must be the called function");
1790 }
1791
1792 ~VPWidenCallRecipe() override = default;
1793
1795 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1796 *this, *this, getDebugLoc());
1797 }
1798
1799 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1800
1801 /// Produce a widened version of the call instruction.
1802 void execute(VPTransformState &State) override;
1803
1804 /// Return the cost of this VPWidenCallRecipe.
1805 InstructionCost computeCost(ElementCount VF,
1806 VPCostContext &Ctx) const override;
1807
1811
1814
1815protected:
1816#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1817 /// Print the recipe.
1818 void printRecipe(raw_ostream &O, const Twine &Indent,
1819 VPSlotTracker &SlotTracker) const override;
1820#endif
1821};
1822
1823/// A recipe representing a sequence of load -> update -> store as part of
1824/// a histogram operation. This means there may be aliasing between vector
1825/// lanes, which is handled by the llvm.experimental.vector.histogram family
1826/// of intrinsics. The only update operations currently supported are
1827/// 'add' and 'sub' where the other term is loop-invariant.
1829 /// Opcode of the update operation, currently either add or sub.
1830 unsigned Opcode;
1831
1832public:
1833 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1835 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1836
1837 ~VPHistogramRecipe() override = default;
1838
1840 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1841 }
1842
1843 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1844
1845 /// Produce a vectorized histogram operation.
1846 void execute(VPTransformState &State) override;
1847
1848 /// Return the cost of this VPHistogramRecipe.
1850 VPCostContext &Ctx) const override;
1851
1852 unsigned getOpcode() const { return Opcode; }
1853
1854 /// Return the mask operand if one was provided, or a null pointer if all
1855 /// lanes should be executed unconditionally.
1856 VPValue *getMask() const {
1857 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1858 }
1859
1860protected:
1861#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1862 /// Print the recipe
1863 void printRecipe(raw_ostream &O, const Twine &Indent,
1864 VPSlotTracker &SlotTracker) const override;
1865#endif
1866};
1867
1868/// A recipe for handling GEP instructions.
1870 Type *SourceElementTy;
1871
1872 bool isPointerLoopInvariant() const {
1873 return getOperand(0)->isDefinedOutsideLoopRegions();
1874 }
1875
1876 bool isIndexLoopInvariant(unsigned I) const {
1877 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1878 }
1879
1880public:
1882 const VPIRFlags &Flags = {},
1884 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1885 SourceElementTy(GEP->getSourceElementType()) {
1886 setUnderlyingValue(GEP);
1888 (void)Metadata;
1890 assert(Metadata.empty() && "unexpected metadata on GEP");
1891 }
1892
1893 ~VPWidenGEPRecipe() override = default;
1894
1897 operands(), *this, getDebugLoc());
1898 }
1899
1900 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1901
1902 /// This recipe generates a GEP instruction.
1903 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1904
1905 /// Generate the gep nodes.
1906 void execute(VPTransformState &State) override;
1907
1908 Type *getSourceElementType() const { return SourceElementTy; }
1909
1910 /// Return the cost of this VPWidenGEPRecipe.
1912 VPCostContext &Ctx) const override {
1913 // TODO: Compute accurate cost after retiring the legacy cost model.
1914 return 0;
1915 }
1916
1917 /// Returns true if the recipe only uses the first lane of operand \p Op.
1918 bool usesFirstLaneOnly(const VPValue *Op) const override;
1919
1920protected:
1921#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1922 /// Print the recipe.
1923 void printRecipe(raw_ostream &O, const Twine &Indent,
1924 VPSlotTracker &SlotTracker) const override;
1925#endif
1926};
1927
1928/// A recipe to compute a pointer to the last element of each part of a widened
1929/// memory access for widened memory accesses of IndexedTy. Used for
1930/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1932 public VPUnrollPartAccessor<2> {
1933 Type *IndexedTy;
1934
1935 /// The constant stride of the pointer computed by this recipe, expressed in
1936 /// units of IndexedTy.
1937 int64_t Stride;
1938
1939public:
1941 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1942 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1943 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1944 IndexedTy(IndexedTy), Stride(Stride) {
1945 assert(Stride < 0 && "Stride must be negative");
1946 }
1947
1948 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1949
1951 const VPValue *getVFValue() const { return getOperand(1); }
1952
1953 void execute(VPTransformState &State) override;
1954
1955 bool usesFirstLaneOnly(const VPValue *Op) const override {
1957 "Op must be an operand of the recipe");
1958 return true;
1959 }
1960
1961 /// Return the cost of this VPVectorPointerRecipe.
1963 VPCostContext &Ctx) const override {
1964 // TODO: Compute accurate cost after retiring the legacy cost model.
1965 return 0;
1966 }
1967
1968 /// Returns true if the recipe only uses the first part of operand \p Op.
1969 bool usesFirstPartOnly(const VPValue *Op) const override {
1971 "Op must be an operand of the recipe");
1972 assert(getNumOperands() <= 2 && "must have at most two operands");
1973 return true;
1974 }
1975
1977 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1978 Stride, getGEPNoWrapFlags(),
1979 getDebugLoc());
1980 }
1981
1982protected:
1983#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1984 /// Print the recipe.
1985 void printRecipe(raw_ostream &O, const Twine &Indent,
1986 VPSlotTracker &SlotTracker) const override;
1987#endif
1988};
1989
1990/// A recipe to compute the pointers for widened memory accesses of \p
1991/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
1992/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
1994 Type *SourceElementTy;
1995
1996public:
1997 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1999 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
2000 SourceElementTy(SourceElementTy) {}
2001
2002 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
2003
2005 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2006 }
2007
2008 void execute(VPTransformState &State) override;
2009
2010 Type *getSourceElementType() const { return SourceElementTy; }
2011
2012 bool usesFirstLaneOnly(const VPValue *Op) const override {
2014 "Op must be an operand of the recipe");
2015 return true;
2016 }
2017
2018 /// Returns true if the recipe only uses the first part of operand \p Op.
2019 bool usesFirstPartOnly(const VPValue *Op) const override {
2021 "Op must be an operand of the recipe");
2022 assert(getNumOperands() <= 2 && "must have at most two operands");
2023 return true;
2024 }
2025
2027 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2029 if (auto *Off = getOffset())
2030 Clone->addOperand(Off);
2031 return Clone;
2032 }
2033
2034 /// Return the cost of this VPHeaderPHIRecipe.
2036 VPCostContext &Ctx) const override {
2037 // TODO: Compute accurate cost after retiring the legacy cost model.
2038 return 0;
2039 }
2040
2041protected:
2042#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2043 /// Print the recipe.
2044 void printRecipe(raw_ostream &O, const Twine &Indent,
2045 VPSlotTracker &SlotTracker) const override;
2046#endif
2047};
2048
2049/// A pure virtual base class for all recipes modeling header phis, including
2050/// phis for first order recurrences, pointer inductions and reductions. The
2051/// start value is the first operand of the recipe and the incoming value from
2052/// the backedge is the second operand.
2053///
2054/// Inductions are modeled using the following sub-classes:
2055/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2056/// starting at a specified value (zero for the main vector loop, the resume
2057/// value for the epilogue vector loop) and stepping by 1. The induction
2058/// controls exiting of the vector loop by comparing against the vector trip
2059/// count. Produces a single scalar PHI for the induction value per
2060/// iteration.
2061/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2062/// floating point inductions with arbitrary start and step values. Produces
2063/// a vector PHI per-part.
2064/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2065/// value of an IV with different start and step values. Produces a single
2066/// scalar value per iteration
2067/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2068/// canonical or derived induction.
2069/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2070/// pointer induction. Produces either a vector PHI per-part or scalar values
2071/// per-lane based on the canonical induction.
2073 public VPPhiAccessors {
2074protected:
2075 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2076 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2077 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2078 UnderlyingInstr, DL) {}
2079
2080 const VPRecipeBase *getAsRecipe() const override { return this; }
2081
2082public:
2083 ~VPHeaderPHIRecipe() override = default;
2084
2085 /// Method to support type inquiry through isa, cast, and dyn_cast.
2086 static inline bool classof(const VPRecipeBase *R) {
2087 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2088 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2089 }
2090 static inline bool classof(const VPValue *V) {
2091 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2092 }
2093 static inline bool classof(const VPSingleDefRecipe *R) {
2094 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2095 }
2096
2097 /// Generate the phi nodes.
2098 void execute(VPTransformState &State) override = 0;
2099
2100 /// Return the cost of this header phi recipe.
2102 VPCostContext &Ctx) const override;
2103
2104 /// Returns the start value of the phi, if one is set.
2106 return getNumOperands() == 0 ? nullptr : getOperand(0);
2107 }
2109 return getNumOperands() == 0 ? nullptr : getOperand(0);
2110 }
2111
2112 /// Update the start value of the recipe.
2114
2115 /// Returns the incoming value from the loop backedge.
2117 return getOperand(1);
2118 }
2119
2120 /// Update the incoming value from the loop backedge.
2122
2123 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2124 /// to be a recipe.
2126 return *getBackedgeValue()->getDefiningRecipe();
2127 }
2128
2129protected:
2130#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2131 /// Print the recipe.
2132 void printRecipe(raw_ostream &O, const Twine &Indent,
2133 VPSlotTracker &SlotTracker) const override = 0;
2134#endif
2135};
2136
2137/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2138/// VPWidenPointerInductionRecipe), providing shared functionality, including
2139/// retrieving the step value, induction descriptor and original phi node.
2141 const InductionDescriptor &IndDesc;
2142
2143public:
2144 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2145 VPValue *Step, const InductionDescriptor &IndDesc,
2146 DebugLoc DL)
2147 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2148 addOperand(Step);
2149 }
2150
2151 static inline bool classof(const VPRecipeBase *R) {
2152 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2153 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2154 }
2155
2156 static inline bool classof(const VPValue *V) {
2157 auto *R = V->getDefiningRecipe();
2158 return R && classof(R);
2159 }
2160
2161 static inline bool classof(const VPSingleDefRecipe *R) {
2162 return classof(static_cast<const VPRecipeBase *>(R));
2163 }
2164
2165 void execute(VPTransformState &State) override = 0;
2166
2167 /// Returns the start value of the induction.
2169
2170 /// Returns the step value of the induction.
2172 const VPValue *getStepValue() const { return getOperand(1); }
2173
2174 /// Update the step value of the recipe.
2175 void setStepValue(VPValue *V) { setOperand(1, V); }
2176
2178 const VPValue *getVFValue() const { return getOperand(2); }
2179
2180 /// Returns the number of incoming values, also number of incoming blocks.
2181 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2182 /// incoming value, its start value.
2183 unsigned getNumIncoming() const override { return 1; }
2184
2186
2187 /// Returns the induction descriptor for the recipe.
2188 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2189
2191 // TODO: All operands of base recipe must exist and be at same index in
2192 // derived recipe.
2194 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2195 }
2196
2198 // TODO: All operands of base recipe must exist and be at same index in
2199 // derived recipe.
2201 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2202 }
2203
2204 /// Returns true if the recipe only uses the first lane of operand \p Op.
2205 bool usesFirstLaneOnly(const VPValue *Op) const override {
2207 "Op must be an operand of the recipe");
2208 // The recipe creates its own wide start value, so it only requests the
2209 // first lane of the operand.
2210 // TODO: Remove once creating the start value is modeled separately.
2211 return Op == getStartValue() || Op == getStepValue();
2212 }
2213};
2214
2215/// A recipe for handling phi nodes of integer and floating-point inductions,
2216/// producing their vector values. This is an abstract recipe and must be
2217/// converted to concrete recipes before executing.
2219 public VPIRFlags {
2220 TruncInst *Trunc;
2221
2222 // If this recipe is unrolled it will have 2 additional operands.
2223 bool isUnrolled() const { return getNumOperands() == 5; }
2224
2225public:
2227 VPValue *VF, const InductionDescriptor &IndDesc,
2228 const VPIRFlags &Flags, DebugLoc DL)
2229 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2230 Step, IndDesc, DL),
2231 VPIRFlags(Flags), Trunc(nullptr) {
2232 addOperand(VF);
2233 }
2234
2236 VPValue *VF, const InductionDescriptor &IndDesc,
2237 TruncInst *Trunc, const VPIRFlags &Flags,
2238 DebugLoc DL)
2239 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2240 Step, IndDesc, DL),
2241 VPIRFlags(Flags), Trunc(Trunc) {
2242 addOperand(VF);
2244 (void)Metadata;
2245 if (Trunc)
2247 assert(Metadata.empty() && "unexpected metadata on Trunc");
2248 }
2249
2251
2257
2258 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2259
2260 void execute(VPTransformState &State) override {
2261 llvm_unreachable("cannot execute this recipe, should be expanded via "
2262 "expandVPWidenIntOrFpInductionRecipe");
2263 }
2264
2265 /// Returns the start value of the induction.
2267
2268 /// If the recipe has been unrolled, return the VPValue for the induction
2269 /// increment, otherwise return null.
2271 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2272 }
2273
2274 /// Returns the number of incoming values, also number of incoming blocks.
2275 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2276 /// incoming value, its start value.
2277 unsigned getNumIncoming() const override { return 1; }
2278
2279 /// Returns the first defined value as TruncInst, if it is one or nullptr
2280 /// otherwise.
2281 TruncInst *getTruncInst() { return Trunc; }
2282 const TruncInst *getTruncInst() const { return Trunc; }
2283
2284 /// Returns true if the induction is canonical, i.e. starting at 0 and
2285 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2286 /// same type as the canonical induction.
2287 bool isCanonical() const;
2288
2289 /// Returns the scalar type of the induction.
2291 return Trunc ? Trunc->getType() : getStartValue()->getType();
2292 }
2293
2294 /// Returns the VPValue representing the value of this induction at
2295 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2296 /// take place.
2298 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2299 }
2300
2301protected:
2302#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2303 /// Print the recipe.
2304 void printRecipe(raw_ostream &O, const Twine &Indent,
2305 VPSlotTracker &SlotTracker) const override;
2306#endif
2307};
2308
2310public:
2311 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2312 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2313 /// VF*UF.
2315 VPValue *NumUnrolledElems,
2316 const InductionDescriptor &IndDesc, DebugLoc DL)
2317 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2318 Step, IndDesc, DL) {
2319 addOperand(NumUnrolledElems);
2320 }
2321
2323
2329
2330 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2331
2332 /// Generate vector values for the pointer induction.
2333 void execute(VPTransformState &State) override {
2334 llvm_unreachable("cannot execute this recipe, should be expanded via "
2335 "expandVPWidenPointerInduction");
2336 };
2337
2338 /// Returns true if only scalar values will be generated.
2339 bool onlyScalarsGenerated(bool IsScalable);
2340
2341protected:
2342#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2343 /// Print the recipe.
2344 void printRecipe(raw_ostream &O, const Twine &Indent,
2345 VPSlotTracker &SlotTracker) const override;
2346#endif
2347};
2348
2349/// A recipe for widened phis. Incoming values are operands of the recipe and
2350/// their operand index corresponds to the incoming predecessor block. If the
2351/// recipe is placed in an entry block to a (non-replicate) region, it must have
2352/// exactly 2 incoming values, the first from the predecessor of the region and
2353/// the second from the exiting block of the region.
2355 public VPPhiAccessors {
2356 /// Name to use for the generated IR instruction for the widened phi.
2357 std::string Name;
2358
2359public:
2360 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2361 /// debug location \p DL.
2362 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2363 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2364 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2365 if (Start)
2366 addOperand(Start);
2367 }
2368
2371 getOperand(0), getDebugLoc(), Name);
2373 C->addOperand(Op);
2374 return C;
2375 }
2376
2377 ~VPWidenPHIRecipe() override = default;
2378
2379 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2380
2381 /// Generate the phi/select nodes.
2382 void execute(VPTransformState &State) override;
2383
2384 /// Return the cost of this VPWidenPHIRecipe.
2386 VPCostContext &Ctx) const override;
2387
2388protected:
2389#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2390 /// Print the recipe.
2391 void printRecipe(raw_ostream &O, const Twine &Indent,
2392 VPSlotTracker &SlotTracker) const override;
2393#endif
2394
2395 const VPRecipeBase *getAsRecipe() const override { return this; }
2396};
2397
2398/// A recipe for handling first-order recurrence phis. The start value is the
2399/// first operand of the recipe and the incoming value from the backedge is the
2400/// second operand.
2403 VPValue &BackedgeValue)
2404 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {
2405 addOperand(&BackedgeValue);
2406 }
2407
2408 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2409
2414
2415 void execute(VPTransformState &State) override;
2416
2417 /// Return the cost of this first-order recurrence phi recipe.
2419 VPCostContext &Ctx) const override;
2420
2421 /// Returns true if the recipe only uses the first lane of operand \p Op.
2422 bool usesFirstLaneOnly(const VPValue *Op) const override {
2424 "Op must be an operand of the recipe");
2425 return Op == getStartValue();
2426 }
2427
2428protected:
2429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2430 /// Print the recipe.
2431 void printRecipe(raw_ostream &O, const Twine &Indent,
2432 VPSlotTracker &SlotTracker) const override;
2433#endif
2434};
2435
2436/// Possible variants of a reduction.
2437
2438/// This reduction is ordered and in-loop.
2439struct RdxOrdered {};
2440/// This reduction is in-loop.
2441struct RdxInLoop {};
2442/// This reduction is unordered with the partial result scaled down by some
2443/// factor.
2446};
2447using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2448
2449inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2450 unsigned ScaleFactor) {
2451 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2452 if (Ordered)
2453 return RdxOrdered{};
2454 if (InLoop)
2455 return RdxInLoop{};
2456 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2457}
2458
2459/// A recipe for handling reduction phis. The start value is the first operand
2460/// of the recipe and the incoming value from the backedge is the second
2461/// operand.
2463 public VPUnrollPartAccessor<2> {
2464 /// The recurrence kind of the reduction.
2465 const RecurKind Kind;
2466
2467 ReductionStyle Style;
2468
2469 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2470 /// patterns for argmin/argmax).
2471 /// TODO: Also support cases where the phi itself has a single use, but its
2472 /// compare has multiple uses.
2473 bool HasUsesOutsideReductionChain;
2474
2475public:
2476 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2478 VPValue &BackedgeValue, ReductionStyle Style,
2479 bool HasUsesOutsideReductionChain = false)
2480 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2481 Style(Style),
2482 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2483 addOperand(&BackedgeValue);
2484 }
2485
2486 ~VPReductionPHIRecipe() override = default;
2487
2489 return new VPReductionPHIRecipe(
2491 *getOperand(0), *getBackedgeValue(), Style,
2492 HasUsesOutsideReductionChain);
2493 }
2494
2495 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2496
2497 /// Generate the phi/select nodes.
2498 void execute(VPTransformState &State) override;
2499
2500 /// Get the factor that the VF of this recipe's output should be scaled by, or
2501 /// 1 if it isn't scaled.
2502 unsigned getVFScaleFactor() const {
2503 auto *Partial = std::get_if<RdxUnordered>(&Style);
2504 return Partial ? Partial->VFScaleFactor : 1;
2505 }
2506
2507 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2508 /// > 1.
2509 void setVFScaleFactor(unsigned ScaleFactor) {
2510 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2511 Style = RdxUnordered{ScaleFactor};
2512 }
2513
2514 /// Returns the number of incoming values, also number of incoming blocks.
2515 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2516 /// incoming value, its start value.
2517 unsigned getNumIncoming() const override { return 2; }
2518
2519 /// Returns the recurrence kind of the reduction.
2520 RecurKind getRecurrenceKind() const { return Kind; }
2521
2522 /// Returns true, if the phi is part of an ordered reduction.
2523 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2524
2525 /// Returns true if the phi is part of an in-loop reduction.
2526 bool isInLoop() const {
2527 return std::holds_alternative<RdxInLoop>(Style) ||
2528 std::holds_alternative<RdxOrdered>(Style);
2529 }
2530
2531 /// Returns true if the reduction outputs a vector with a scaled down VF.
2532 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2533
2534 /// Returns true, if the phi is part of a multi-use reduction.
2536 return HasUsesOutsideReductionChain;
2537 }
2538
2539 /// Returns true if the recipe only uses the first lane of operand \p Op.
2540 bool usesFirstLaneOnly(const VPValue *Op) const override {
2542 "Op must be an operand of the recipe");
2543 return isOrdered() || isInLoop();
2544 }
2545
2546protected:
2547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2548 /// Print the recipe.
2549 void printRecipe(raw_ostream &O, const Twine &Indent,
2550 VPSlotTracker &SlotTracker) const override;
2551#endif
2552};
2553
2554/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2555/// instructions.
2557public:
2558 /// The blend operation is a User of the incoming values and of their
2559 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2560 /// be omitted (implied by passing an odd number of operands) in which case
2561 /// all other incoming values are merged into it.
2563 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2564 assert(Operands.size() >= 2 && "Expected at least two operands!");
2565 }
2566
2571
2572 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2573
2574 /// A normalized blend is one that has an odd number of operands, whereby the
2575 /// first operand does not have an associated mask.
2576 bool isNormalized() const { return getNumOperands() % 2; }
2577
2578 /// Return the number of incoming values, taking into account when normalized
2579 /// the first incoming value will have no mask.
2580 unsigned getNumIncomingValues() const {
2581 return (getNumOperands() + isNormalized()) / 2;
2582 }
2583
2584 /// Return incoming value number \p Idx.
2585 VPValue *getIncomingValue(unsigned Idx) const {
2586 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2587 }
2588
2589 /// Return mask number \p Idx.
2590 VPValue *getMask(unsigned Idx) const {
2591 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2592 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2593 }
2594
2595 /// Set mask number \p Idx to \p V.
2596 void setMask(unsigned Idx, VPValue *V) {
2597 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2598 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2599 }
2600
2601 void execute(VPTransformState &State) override {
2602 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2603 }
2604
2605 /// Return the cost of this VPWidenMemoryRecipe.
2606 InstructionCost computeCost(ElementCount VF,
2607 VPCostContext &Ctx) const override;
2608
2609 /// Returns true if the recipe only uses the first lane of operand \p Op.
2610 bool usesFirstLaneOnly(const VPValue *Op) const override {
2612 "Op must be an operand of the recipe");
2613 // Recursing through Blend recipes only, must terminate at header phi's the
2614 // latest.
2615 return all_of(users(),
2616 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2617 }
2618
2619protected:
2620#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2621 /// Print the recipe.
2622 void printRecipe(raw_ostream &O, const Twine &Indent,
2623 VPSlotTracker &SlotTracker) const override;
2624#endif
2625};
2626
2627/// A common base class for interleaved memory operations.
2628/// An Interleaved memory operation is a memory access method that combines
2629/// multiple strided loads/stores into a single wide load/store with shuffles.
2630/// The first operand is the start address. The optional operands are, in order,
2631/// the stored values and the mask.
2633 public VPIRMetadata {
2635
2636 /// Indicates if the interleave group is in a conditional block and requires a
2637 /// mask.
2638 bool HasMask = false;
2639
2640 /// Indicates if gaps between members of the group need to be masked out or if
2641 /// unusued gaps can be loaded speculatively.
2642 bool NeedsMaskForGaps = false;
2643
2644protected:
2645 VPInterleaveBase(const unsigned char SC,
2647 ArrayRef<VPValue *> Operands,
2648 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2649 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2650 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2651 NeedsMaskForGaps(NeedsMaskForGaps) {
2652 // TODO: extend the masked interleaved-group support to reversed access.
2653 assert((!Mask || !IG->isReverse()) &&
2654 "Reversed masked interleave-group not supported.");
2655 if (StoredValues.empty()) {
2656 for (unsigned I = 0; I < IG->getFactor(); ++I)
2657 if (Instruction *Inst = IG->getMember(I)) {
2658 assert(!Inst->getType()->isVoidTy() && "must have result");
2659 new VPRecipeValue(this, Inst);
2660 }
2661 } else {
2662 for (auto *SV : StoredValues)
2663 addOperand(SV);
2664 }
2665 if (Mask) {
2666 HasMask = true;
2667 addOperand(Mask);
2668 }
2669 }
2670
2671public:
2672 VPInterleaveBase *clone() override = 0;
2673
2674 static inline bool classof(const VPRecipeBase *R) {
2675 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2676 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2677 }
2678
2679 static inline bool classof(const VPUser *U) {
2680 auto *R = dyn_cast<VPRecipeBase>(U);
2681 return R && classof(R);
2682 }
2683
2684 /// Return the address accessed by this recipe.
2685 VPValue *getAddr() const {
2686 return getOperand(0); // Address is the 1st, mandatory operand.
2687 }
2688
2689 /// Return the mask used by this recipe. Note that a full mask is represented
2690 /// by a nullptr.
2691 VPValue *getMask() const {
2692 // Mask is optional and the last operand.
2693 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2694 }
2695
2696 /// Return true if the access needs a mask because of the gaps.
2697 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2698
2700
2701 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2702
2703 void execute(VPTransformState &State) override {
2704 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2705 }
2706
2707 /// Return the cost of this recipe.
2708 InstructionCost computeCost(ElementCount VF,
2709 VPCostContext &Ctx) const override;
2710
2711 /// Returns true if the recipe only uses the first lane of operand \p Op.
2712 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2713
2714 /// Returns the number of stored operands of this interleave group. Returns 0
2715 /// for load interleave groups.
2716 virtual unsigned getNumStoreOperands() const = 0;
2717
2718 /// Return the VPValues stored by this interleave group. If it is a load
2719 /// interleave group, return an empty ArrayRef.
2721 return ArrayRef<VPValue *>(op_end() -
2722 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2724 }
2725};
2726
2727/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2728/// or stores into one wide load/store and shuffles. The first operand of a
2729/// VPInterleave recipe is the address, followed by the stored values, followed
2730/// by an optional mask.
2732public:
2734 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2735 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2736 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2737 NeedsMaskForGaps, MD, DL) {}
2738
2739 ~VPInterleaveRecipe() override = default;
2740
2744 needsMaskForGaps(), *this, getDebugLoc());
2745 }
2746
2747 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2748
2749 /// Generate the wide load or store, and shuffles.
2750 void execute(VPTransformState &State) override;
2751
2752 bool usesFirstLaneOnly(const VPValue *Op) const override {
2754 "Op must be an operand of the recipe");
2755 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2756 }
2757
2758 unsigned getNumStoreOperands() const override {
2759 return getNumOperands() - (getMask() ? 2 : 1);
2760 }
2761
2762protected:
2763#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2764 /// Print the recipe.
2765 void printRecipe(raw_ostream &O, const Twine &Indent,
2766 VPSlotTracker &SlotTracker) const override;
2767#endif
2768};
2769
2770/// A recipe for interleaved memory operations with vector-predication
2771/// intrinsics. The first operand is the address, the second operand is the
2772/// explicit vector length. Stored values and mask are optional operands.
2774public:
2776 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2777 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2778 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2779 R.getDebugLoc()) {
2780 assert(!getInterleaveGroup()->isReverse() &&
2781 "Reversed interleave-group with tail folding is not supported.");
2782 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2783 "supported for scalable vector.");
2784 }
2785
2786 ~VPInterleaveEVLRecipe() override = default;
2787
2789 llvm_unreachable("cloning not implemented yet");
2790 }
2791
2792 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2793
2794 /// The VPValue of the explicit vector length.
2795 VPValue *getEVL() const { return getOperand(1); }
2796
2797 /// Generate the wide load or store, and shuffles.
2798 void execute(VPTransformState &State) override;
2799
2800 /// The recipe only uses the first lane of the address, and EVL operand.
2801 bool usesFirstLaneOnly(const VPValue *Op) const override {
2803 "Op must be an operand of the recipe");
2804 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2805 Op == getEVL();
2806 }
2807
2808 unsigned getNumStoreOperands() const override {
2809 return getNumOperands() - (getMask() ? 3 : 2);
2810 }
2811
2812protected:
2813#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2814 /// Print the recipe.
2815 void printRecipe(raw_ostream &O, const Twine &Indent,
2816 VPSlotTracker &SlotTracker) const override;
2817#endif
2818};
2819
2820/// A recipe to represent inloop, ordered or partial reduction operations. It
2821/// performs a reduction on a vector operand into a scalar (vector in the case
2822/// of a partial reduction) value, and adds the result to a chain. The Operands
2823/// are {ChainOp, VecOp, [Condition]}.
2825
2826 /// The recurrence kind for the reduction in question.
2827 RecurKind RdxKind;
2828 /// Whether the reduction is conditional.
2829 bool IsConditional = false;
2830 ReductionStyle Style;
2831
2832protected:
2833 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2835 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2836 ReductionStyle Style, DebugLoc DL)
2837 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2838 Style(Style) {
2839 if (CondOp) {
2840 IsConditional = true;
2841 addOperand(CondOp);
2842 }
2844 }
2845
2846public:
2848 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2850 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2851 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2852 DL) {}
2853
2855 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2857 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2858 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2859 DL) {}
2860
2861 ~VPReductionRecipe() override = default;
2862
2864 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2866 getCondOp(), Style, getDebugLoc());
2867 }
2868
2869 static inline bool classof(const VPRecipeBase *R) {
2870 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2871 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2872 }
2873
2874 static inline bool classof(const VPUser *U) {
2875 auto *R = dyn_cast<VPRecipeBase>(U);
2876 return R && classof(R);
2877 }
2878
2879 static inline bool classof(const VPValue *VPV) {
2880 const VPRecipeBase *R = VPV->getDefiningRecipe();
2881 return R && classof(R);
2882 }
2883
2884 static inline bool classof(const VPSingleDefRecipe *R) {
2885 return classof(static_cast<const VPRecipeBase *>(R));
2886 }
2887
2888 /// Generate the reduction in the loop.
2889 void execute(VPTransformState &State) override;
2890
2891 /// Return the cost of VPReductionRecipe.
2892 InstructionCost computeCost(ElementCount VF,
2893 VPCostContext &Ctx) const override;
2894
2895 /// Return the recurrence kind for the in-loop reduction.
2896 RecurKind getRecurrenceKind() const { return RdxKind; }
2897 /// Return true if the in-loop reduction is ordered.
2898 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2899 /// Return true if the in-loop reduction is conditional.
2900 bool isConditional() const { return IsConditional; };
2901 /// Returns true if the reduction outputs a vector with a scaled down VF.
2902 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2903 /// Returns true if the reduction is in-loop.
2904 bool isInLoop() const {
2905 return std::holds_alternative<RdxInLoop>(Style) ||
2906 std::holds_alternative<RdxOrdered>(Style);
2907 }
2908 /// The VPValue of the scalar Chain being accumulated.
2909 VPValue *getChainOp() const { return getOperand(0); }
2910 /// The VPValue of the vector value to be reduced.
2911 VPValue *getVecOp() const { return getOperand(1); }
2912 /// The VPValue of the condition for the block.
2914 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2915 }
2916 /// Get the factor that the VF of this recipe's output should be scaled by, or
2917 /// 1 if it isn't scaled.
2918 unsigned getVFScaleFactor() const {
2919 auto *Partial = std::get_if<RdxUnordered>(&Style);
2920 return Partial ? Partial->VFScaleFactor : 1;
2921 }
2922
2923protected:
2924#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2925 /// Print the recipe.
2926 void printRecipe(raw_ostream &O, const Twine &Indent,
2927 VPSlotTracker &SlotTracker) const override;
2928#endif
2929};
2930
2931/// A recipe to represent inloop reduction operations with vector-predication
2932/// intrinsics, performing a reduction on a vector operand with the explicit
2933/// vector length (EVL) into a scalar value, and adding the result to a chain.
2934/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2936public:
2940 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2941 R.getFastMathFlags(),
2943 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2944 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2945
2946 ~VPReductionEVLRecipe() override = default;
2947
2949 llvm_unreachable("cloning not implemented yet");
2950 }
2951
2952 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2953
2954 /// Generate the reduction in the loop
2955 void execute(VPTransformState &State) override;
2956
2957 /// The VPValue of the explicit vector length.
2958 VPValue *getEVL() const { return getOperand(2); }
2959
2960 /// Returns true if the recipe only uses the first lane of operand \p Op.
2961 bool usesFirstLaneOnly(const VPValue *Op) const override {
2963 "Op must be an operand of the recipe");
2964 return Op == getEVL();
2965 }
2966
2967protected:
2968#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2969 /// Print the recipe.
2970 void printRecipe(raw_ostream &O, const Twine &Indent,
2971 VPSlotTracker &SlotTracker) const override;
2972#endif
2973};
2974
2975/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2976/// copies of the original scalar type, one per lane, instead of producing a
2977/// single copy of widened type for all lanes. If the instruction is known to be
2978/// a single scalar, only one copy, per lane zero, will be generated.
2980 public VPIRMetadata {
2981 /// Indicator if only a single replica per lane is needed.
2982 bool IsSingleScalar;
2983
2984 /// Indicator if the replicas are also predicated.
2985 bool IsPredicated;
2986
2987public:
2989 bool IsSingleScalar, VPValue *Mask = nullptr,
2990 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2991 DebugLoc DL = DebugLoc::getUnknown())
2992 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2993 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2994 IsPredicated(Mask) {
2995 setUnderlyingValue(I);
2996 if (Mask)
2997 addOperand(Mask);
2998 }
2999
3000 ~VPReplicateRecipe() override = default;
3001
3003 auto *Copy = new VPReplicateRecipe(
3004 getUnderlyingInstr(), operands(), IsSingleScalar,
3005 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3006 Copy->transferFlags(*this);
3007 return Copy;
3008 }
3009
3010 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
3011
3012 /// Generate replicas of the desired Ingredient. Replicas will be generated
3013 /// for all parts and lanes unless a specific part and lane are specified in
3014 /// the \p State.
3015 void execute(VPTransformState &State) override;
3016
3017 /// Return the cost of this VPReplicateRecipe.
3018 InstructionCost computeCost(ElementCount VF,
3019 VPCostContext &Ctx) const override;
3020
3021 bool isSingleScalar() const { return IsSingleScalar; }
3022
3023 bool isPredicated() const { return IsPredicated; }
3024
3025 /// Returns true if the recipe only uses the first lane of operand \p Op.
3026 bool usesFirstLaneOnly(const VPValue *Op) const override {
3028 "Op must be an operand of the recipe");
3029 return isSingleScalar();
3030 }
3031
3032 /// Returns true if the recipe uses scalars of operand \p Op.
3033 bool usesScalars(const VPValue *Op) const override {
3035 "Op must be an operand of the recipe");
3036 return true;
3037 }
3038
3039 /// Returns true if the recipe is used by a widened recipe via an intervening
3040 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3041 /// in a vector.
3042 bool shouldPack() const;
3043
3044 /// Return the mask of a predicated VPReplicateRecipe.
3046 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3047 return getOperand(getNumOperands() - 1);
3048 }
3049
3050 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3051
3052protected:
3053#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3054 /// Print the recipe.
3055 void printRecipe(raw_ostream &O, const Twine &Indent,
3056 VPSlotTracker &SlotTracker) const override;
3057#endif
3058};
3059
3060/// A recipe for generating conditional branches on the bits of a mask.
3062public:
3064 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3065
3068 }
3069
3070 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3071
3072 /// Generate the extraction of the appropriate bit from the block mask and the
3073 /// conditional branch.
3074 void execute(VPTransformState &State) override;
3075
3076 /// Return the cost of this VPBranchOnMaskRecipe.
3077 InstructionCost computeCost(ElementCount VF,
3078 VPCostContext &Ctx) const override;
3079
3080#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3081 /// Print the recipe.
3082 void printRecipe(raw_ostream &O, const Twine &Indent,
3083 VPSlotTracker &SlotTracker) const override {
3084 O << Indent << "BRANCH-ON-MASK ";
3086 }
3087#endif
3088
3089 /// Returns true if the recipe uses scalars of operand \p Op.
3090 bool usesScalars(const VPValue *Op) const override {
3092 "Op must be an operand of the recipe");
3093 return true;
3094 }
3095};
3096
3097/// A recipe to combine multiple recipes into a single 'expression' recipe,
3098/// which should be considered a single entity for cost-modeling and transforms.
3099/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3100/// expression recipes, before execute. The individual expression recipes are
3101/// completely disconnected from the def-use graph of other recipes not part of
3102/// the expression. Def-use edges between pairs of expression recipes remain
3103/// intact, whereas every edge between an expression recipe and a recipe outside
3104/// the expression is elevated to connect the non-expression recipe with the
3105/// VPExpressionRecipe itself.
3106class VPExpressionRecipe : public VPSingleDefRecipe {
3107 /// Recipes included in this VPExpressionRecipe. This could contain
3108 /// duplicates.
3109 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3110
3111 /// Temporary VPValues used for external operands of the expression, i.e.
3112 /// operands not defined by recipes in the expression.
3113 SmallVector<VPValue *> LiveInPlaceholders;
3114
3115 enum class ExpressionTypes {
3116 /// Represents an inloop extended reduction operation, performing a
3117 /// reduction on an extended vector operand into a scalar value, and adding
3118 /// the result to a chain.
3119 ExtendedReduction,
3120 /// Represent an inloop multiply-accumulate reduction, multiplying the
3121 /// extended vector operands, performing a reduction.add on the result, and
3122 /// adding the scalar result to a chain.
3123 ExtMulAccReduction,
3124 /// Represent an inloop multiply-accumulate reduction, multiplying the
3125 /// vector operands, performing a reduction.add on the result, and adding
3126 /// the scalar result to a chain.
3127 MulAccReduction,
3128 /// Represent an inloop multiply-accumulate reduction, multiplying the
3129 /// extended vector operands, negating the multiplication, performing a
3130 /// reduction.add on the result, and adding the scalar result to a chain.
3131 ExtNegatedMulAccReduction,
3132 };
3133
3134 /// Type of the expression.
3135 ExpressionTypes ExpressionType;
3136
3137 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3138 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3139 /// in the expression) are replaced by temporary VPValues and the original
3140 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3141 /// as needed (excluding last) to ensure they are only used by other recipes
3142 /// in the expression.
3143 VPExpressionRecipe(ExpressionTypes ExpressionType,
3144 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3145
3146public:
3148 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3150 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3153 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3154 {Ext0, Ext1, Mul, Red}) {}
3157 VPReductionRecipe *Red)
3158 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3159 {Ext0, Ext1, Mul, Sub, Red}) {
3160 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3161 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3162 "Expected an add reduction");
3163 assert(getNumOperands() >= 3 && "Expected at least three operands");
3164 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3165 assert(SubConst && SubConst->isZero() &&
3166 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3167 }
3168
3170 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3171 for (auto *R : reverse(ExpressionRecipes)) {
3172 if (ExpressionRecipesSeen.insert(R).second)
3173 delete R;
3174 }
3175 for (VPValue *T : LiveInPlaceholders)
3176 delete T;
3177 }
3178
3179 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3180
3181 VPExpressionRecipe *clone() override {
3182 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3183 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3184 for (auto *R : ExpressionRecipes)
3185 NewExpressiondRecipes.push_back(R->clone());
3186 for (auto *New : NewExpressiondRecipes) {
3187 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3188 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3189 // Update placeholder operands in the cloned recipe to use the external
3190 // operands, to be internalized when the cloned expression is constructed.
3191 for (const auto &[Placeholder, OutsideOp] :
3192 zip(LiveInPlaceholders, operands()))
3193 New->replaceUsesOfWith(Placeholder, OutsideOp);
3194 }
3195 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3196 }
3197
3198 /// Return the VPValue to use to infer the result type of the recipe.
3200 unsigned OpIdx =
3201 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3202 : 1;
3203 return getOperand(getNumOperands() - OpIdx);
3204 }
3205
3206 /// Insert the recipes of the expression back into the VPlan, directly before
3207 /// the current recipe. Leaves the expression recipe empty, which must be
3208 /// removed before codegen.
3209 void decompose();
3210
3211 unsigned getVFScaleFactor() const {
3212 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3213 return PR ? PR->getVFScaleFactor() : 1;
3214 }
3215
3216 /// Method for generating code, must not be called as this recipe is abstract.
3217 void execute(VPTransformState &State) override {
3218 llvm_unreachable("recipe must be removed before execute");
3219 }
3220
3222 VPCostContext &Ctx) const override;
3223
3224 /// Returns true if this expression contains recipes that may read from or
3225 /// write to memory.
3226 bool mayReadOrWriteMemory() const;
3227
3228 /// Returns true if this expression contains recipes that may have side
3229 /// effects.
3230 bool mayHaveSideEffects() const;
3231
3232 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3233 bool isSingleScalar() const;
3234
3235protected:
3236#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3237 /// Print the recipe.
3238 void printRecipe(raw_ostream &O, const Twine &Indent,
3239 VPSlotTracker &SlotTracker) const override;
3240#endif
3241};
3242
3243/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3244/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3245/// order to merge values that are set under such a branch and feed their uses.
3246/// The phi nodes can be scalar or vector depending on the users of the value.
3247/// This recipe works in concert with VPBranchOnMaskRecipe.
3249public:
3250 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3251 /// nodes after merging back from a Branch-on-Mask.
3253 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3254 ~VPPredInstPHIRecipe() override = default;
3255
3257 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3258 }
3259
3260 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3261
3262 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3263 /// retain SSA form.
3264 void execute(VPTransformState &State) override;
3265
3266 /// Return the cost of this VPPredInstPHIRecipe.
3268 VPCostContext &Ctx) const override {
3269 // TODO: Compute accurate cost after retiring the legacy cost model.
3270 return 0;
3271 }
3272
3273 /// Returns true if the recipe uses scalars of operand \p Op.
3274 bool usesScalars(const VPValue *Op) const override {
3276 "Op must be an operand of the recipe");
3277 return true;
3278 }
3279
3280protected:
3281#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3282 /// Print the recipe.
3283 void printRecipe(raw_ostream &O, const Twine &Indent,
3284 VPSlotTracker &SlotTracker) const override;
3285#endif
3286};
3287
3288/// A common base class for widening memory operations. An optional mask can be
3289/// provided as the last operand.
3291 public VPIRMetadata {
3292protected:
3294
3295 /// Alignment information for this memory access.
3297
3298 /// Whether the accessed addresses are consecutive.
3300
3301 /// Whether the consecutive accessed addresses are in reverse order.
3303
3304 /// Whether the memory access is masked.
3305 bool IsMasked = false;
3306
3307 void setMask(VPValue *Mask) {
3308 assert(!IsMasked && "cannot re-set mask");
3309 if (!Mask)
3310 return;
3311 addOperand(Mask);
3312 IsMasked = true;
3313 }
3314
3315 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3316 std::initializer_list<VPValue *> Operands,
3317 bool Consecutive, bool Reverse,
3318 const VPIRMetadata &Metadata, DebugLoc DL)
3319 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3321 Reverse(Reverse) {
3322 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3324 "Reversed acccess without VPVectorEndPointerRecipe address?");
3325 }
3326
3327public:
3329 llvm_unreachable("cloning not supported");
3330 }
3331
3332 static inline bool classof(const VPRecipeBase *R) {
3333 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3334 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3335 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3336 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3337 }
3338
3339 static inline bool classof(const VPUser *U) {
3340 auto *R = dyn_cast<VPRecipeBase>(U);
3341 return R && classof(R);
3342 }
3343
3344 /// Return whether the loaded-from / stored-to addresses are consecutive.
3345 bool isConsecutive() const { return Consecutive; }
3346
3347 /// Return whether the consecutive loaded/stored addresses are in reverse
3348 /// order.
3349 bool isReverse() const { return Reverse; }
3350
3351 /// Return the address accessed by this recipe.
3352 VPValue *getAddr() const { return getOperand(0); }
3353
3354 /// Returns true if the recipe is masked.
3355 bool isMasked() const { return IsMasked; }
3356
3357 /// Return the mask used by this recipe. Note that a full mask is represented
3358 /// by a nullptr.
3359 VPValue *getMask() const {
3360 // Mask is optional and therefore the last operand.
3361 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3362 }
3363
3364 /// Returns the alignment of the memory access.
3365 Align getAlign() const { return Alignment; }
3366
3367 /// Generate the wide load/store.
3368 void execute(VPTransformState &State) override {
3369 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3370 }
3371
3372 /// Return the cost of this VPWidenMemoryRecipe.
3373 InstructionCost computeCost(ElementCount VF,
3374 VPCostContext &Ctx) const override;
3375
3377};
3378
3379/// A recipe for widening load operations, using the address to load from and an
3380/// optional mask.
3382 public VPRecipeValue {
3384 bool Consecutive, bool Reverse,
3385 const VPIRMetadata &Metadata, DebugLoc DL)
3386 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3387 Reverse, Metadata, DL),
3388 VPRecipeValue(this, &Load) {
3389 setMask(Mask);
3390 }
3391
3394 getMask(), Consecutive, Reverse, *this,
3395 getDebugLoc());
3396 }
3397
3398 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3399
3400 /// Generate a wide load or gather.
3401 void execute(VPTransformState &State) override;
3402
3403 /// Returns true if the recipe only uses the first lane of operand \p Op.
3404 bool usesFirstLaneOnly(const VPValue *Op) const override {
3406 "Op must be an operand of the recipe");
3407 // Widened, consecutive loads operations only demand the first lane of
3408 // their address.
3409 return Op == getAddr() && isConsecutive();
3410 }
3411
3412protected:
3413#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3414 /// Print the recipe.
3415 void printRecipe(raw_ostream &O, const Twine &Indent,
3416 VPSlotTracker &SlotTracker) const override;
3417#endif
3418};
3419
3420/// A recipe for widening load operations with vector-predication intrinsics,
3421/// using the address to load from, the explicit vector length and an optional
3422/// mask.
3424 public VPRecipeValue {
3426 VPValue *Mask)
3427 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3428 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3429 L.getDebugLoc()),
3430 VPRecipeValue(this, &getIngredient()) {
3431 setMask(Mask);
3432 }
3433
3434 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3435
3436 /// Return the EVL operand.
3437 VPValue *getEVL() const { return getOperand(1); }
3438
3439 /// Generate the wide load or gather.
3440 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3441
3442 /// Return the cost of this VPWidenLoadEVLRecipe.
3444 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3445
3446 /// Returns true if the recipe only uses the first lane of operand \p Op.
3447 bool usesFirstLaneOnly(const VPValue *Op) const override {
3449 "Op must be an operand of the recipe");
3450 // Widened loads only demand the first lane of EVL and consecutive loads
3451 // only demand the first lane of their address.
3452 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3453 }
3454
3455protected:
3456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3457 /// Print the recipe.
3458 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3459 VPSlotTracker &SlotTracker) const override;
3460#endif
3461};
3462
3463/// A recipe for widening store operations, using the stored value, the address
3464/// to store to and an optional mask.
3466 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3467 VPValue *Mask, bool Consecutive, bool Reverse,
3468 const VPIRMetadata &Metadata, DebugLoc DL)
3469 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3470 Consecutive, Reverse, Metadata, DL) {
3471 setMask(Mask);
3472 }
3473
3479
3480 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3481
3482 /// Return the value stored by this recipe.
3483 VPValue *getStoredValue() const { return getOperand(1); }
3484
3485 /// Generate a wide store or scatter.
3486 void execute(VPTransformState &State) override;
3487
3488 /// Returns true if the recipe only uses the first lane of operand \p Op.
3489 bool usesFirstLaneOnly(const VPValue *Op) const override {
3491 "Op must be an operand of the recipe");
3492 // Widened, consecutive stores only demand the first lane of their address,
3493 // unless the same operand is also stored.
3494 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3495 }
3496
3497protected:
3498#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3499 /// Print the recipe.
3500 void printRecipe(raw_ostream &O, const Twine &Indent,
3501 VPSlotTracker &SlotTracker) const override;
3502#endif
3503};
3504
3505/// A recipe for widening store operations with vector-predication intrinsics,
3506/// using the value to store, the address to store to, the explicit vector
3507/// length and an optional mask.
3510 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3511 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3512 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3513 S.isReverse(), S, S.getDebugLoc()) {
3514 setMask(Mask);
3515 }
3516
3517 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3518
3519 /// Return the address accessed by this recipe.
3520 VPValue *getStoredValue() const { return getOperand(1); }
3521
3522 /// Return the EVL operand.
3523 VPValue *getEVL() const { return getOperand(2); }
3524
3525 /// Generate the wide store or scatter.
3526 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3527
3528 /// Return the cost of this VPWidenStoreEVLRecipe.
3530 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3531
3532 /// Returns true if the recipe only uses the first lane of operand \p Op.
3533 bool usesFirstLaneOnly(const VPValue *Op) const override {
3535 "Op must be an operand of the recipe");
3536 if (Op == getEVL()) {
3537 assert(getStoredValue() != Op && "unexpected store of EVL");
3538 return true;
3539 }
3540 // Widened, consecutive memory operations only demand the first lane of
3541 // their address, unless the same operand is also stored. That latter can
3542 // happen with opaque pointers.
3543 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3544 }
3545
3546protected:
3547#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3548 /// Print the recipe.
3549 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3550 VPSlotTracker &SlotTracker) const override;
3551#endif
3552};
3553
3554/// Recipe to expand a SCEV expression.
3556 const SCEV *Expr;
3557
3558public:
3560 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3561
3562 ~VPExpandSCEVRecipe() override = default;
3563
3564 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3565
3566 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3567
3568 void execute(VPTransformState &State) override {
3569 llvm_unreachable("SCEV expressions must be expanded before final execute");
3570 }
3571
3572 /// Return the cost of this VPExpandSCEVRecipe.
3574 VPCostContext &Ctx) const override {
3575 // TODO: Compute accurate cost after retiring the legacy cost model.
3576 return 0;
3577 }
3578
3579 const SCEV *getSCEV() const { return Expr; }
3580
3581protected:
3582#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3583 /// Print the recipe.
3584 void printRecipe(raw_ostream &O, const Twine &Indent,
3585 VPSlotTracker &SlotTracker) const override;
3586#endif
3587};
3588
3589/// Canonical scalar induction phi of the vector loop. Starting at the specified
3590/// start value (either 0 or the resume value when vectorizing the epilogue
3591/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3592/// canonical induction variable.
3594public:
3596 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3597
3598 ~VPCanonicalIVPHIRecipe() override = default;
3599
3602 R->addOperand(getBackedgeValue());
3603 return R;
3604 }
3605
3606 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3607
3608 void execute(VPTransformState &State) override {
3609 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3610 "scalar phi recipe");
3611 }
3612
3613 /// Returns the start value of the canonical induction.
3615
3616 /// Returns the scalar type of the induction.
3617 Type *getScalarType() const { return getStartValue()->getType(); }
3618
3619 /// Returns true if the recipe only uses the first lane of operand \p Op.
3620 bool usesFirstLaneOnly(const VPValue *Op) const override {
3622 "Op must be an operand of the recipe");
3623 return true;
3624 }
3625
3626 /// Returns true if the recipe only uses the first part of operand \p Op.
3627 bool usesFirstPartOnly(const VPValue *Op) const override {
3629 "Op must be an operand of the recipe");
3630 return true;
3631 }
3632
3633 /// Return the cost of this VPCanonicalIVPHIRecipe.
3635 VPCostContext &Ctx) const override {
3636 // For now, match the behavior of the legacy cost model.
3637 return 0;
3638 }
3639
3640protected:
3641#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3642 /// Print the recipe.
3643 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3644 VPSlotTracker &SlotTracker) const override;
3645#endif
3646};
3647
3648/// A recipe for generating the active lane mask for the vector loop that is
3649/// used to predicate the vector operations.
3651public:
3653 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3654 DL) {}
3655
3656 ~VPActiveLaneMaskPHIRecipe() override = default;
3657
3660 if (getNumOperands() == 2)
3661 R->addOperand(getOperand(1));
3662 return R;
3663 }
3664
3665 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3666
3667 /// Generate the active lane mask phi of the vector loop.
3668 void execute(VPTransformState &State) override;
3669
3670protected:
3671#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3672 /// Print the recipe.
3673 void printRecipe(raw_ostream &O, const Twine &Indent,
3674 VPSlotTracker &SlotTracker) const override;
3675#endif
3676};
3677
3678/// A recipe for generating the phi node for the current index of elements,
3679/// adjusted in accordance with EVL value. It starts at the start value of the
3680/// canonical induction and gets incremented by EVL in each iteration of the
3681/// vector loop.
3683public:
3685 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3686
3687 ~VPEVLBasedIVPHIRecipe() override = default;
3688
3690 llvm_unreachable("cloning not implemented yet");
3691 }
3692
3693 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3694
3695 void execute(VPTransformState &State) override {
3696 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3697 "scalar phi recipe");
3698 }
3699
3700 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3702 VPCostContext &Ctx) const override {
3703 // For now, match the behavior of the legacy cost model.
3704 return 0;
3705 }
3706
3707 /// Returns true if the recipe only uses the first lane of operand \p Op.
3708 bool usesFirstLaneOnly(const VPValue *Op) const override {
3710 "Op must be an operand of the recipe");
3711 return true;
3712 }
3713
3714protected:
3715#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3716 /// Print the recipe.
3717 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3718 VPSlotTracker &SlotTracker) const override;
3719#endif
3720};
3721
3722/// A Recipe for widening the canonical induction variable of the vector loop.
3724 public VPUnrollPartAccessor<1> {
3725public:
3727 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3728
3729 ~VPWidenCanonicalIVRecipe() override = default;
3730
3735
3736 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3737
3738 /// Generate a canonical vector induction variable of the vector loop, with
3739 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3740 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3741 void execute(VPTransformState &State) override;
3742
3743 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3745 VPCostContext &Ctx) const override {
3746 // TODO: Compute accurate cost after retiring the legacy cost model.
3747 return 0;
3748 }
3749
3750protected:
3751#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3752 /// Print the recipe.
3753 void printRecipe(raw_ostream &O, const Twine &Indent,
3754 VPSlotTracker &SlotTracker) const override;
3755#endif
3756};
3757
3758/// A recipe for converting the input value \p IV value to the corresponding
3759/// value of an IV with different start and step values, using Start + IV *
3760/// Step.
3762 /// Kind of the induction.
3764 /// If not nullptr, the floating point induction binary operator. Must be set
3765 /// for floating point inductions.
3766 const FPMathOperator *FPBinOp;
3767
3768 /// Name to use for the generated IR instruction for the derived IV.
3769 std::string Name;
3770
3771public:
3773 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3774 const Twine &Name = "")
3776 IndDesc.getKind(),
3777 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3778 Start, CanonicalIV, Step, Name) {}
3779
3781 const FPMathOperator *FPBinOp, VPIRValue *Start,
3782 VPValue *IV, VPValue *Step, const Twine &Name = "")
3783 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3784 FPBinOp(FPBinOp), Name(Name.str()) {}
3785
3786 ~VPDerivedIVRecipe() override = default;
3787
3789 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3790 getStepValue());
3791 }
3792
3793 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3794
3795 /// Generate the transformed value of the induction at offset StartValue (1.
3796 /// operand) + IV (2. operand) * StepValue (3, operand).
3797 void execute(VPTransformState &State) override;
3798
3799 /// Return the cost of this VPDerivedIVRecipe.
3801 VPCostContext &Ctx) const override {
3802 // TODO: Compute accurate cost after retiring the legacy cost model.
3803 return 0;
3804 }
3805
3806 Type *getScalarType() const { return getStartValue()->getType(); }
3807
3809 VPValue *getStepValue() const { return getOperand(2); }
3810
3811 /// Returns true if the recipe only uses the first lane of operand \p Op.
3812 bool usesFirstLaneOnly(const VPValue *Op) const override {
3814 "Op must be an operand of the recipe");
3815 return true;
3816 }
3817
3818protected:
3819#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3820 /// Print the recipe.
3821 void printRecipe(raw_ostream &O, const Twine &Indent,
3822 VPSlotTracker &SlotTracker) const override;
3823#endif
3824};
3825
3826/// A recipe for handling phi nodes of integer and floating-point inductions,
3827/// producing their scalar values. Before unrolling by UF the recipe represents
3828/// the VF*UF scalar values to be produced, or UF scalar values if only first
3829/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3830/// operand StartIndex to all unroll parts except part 0, as the recipe
3831/// represents the VF scalar values (this number of values is taken from
3832/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3834 Instruction::BinaryOps InductionOpcode;
3835
3836public:
3839 DebugLoc DL)
3840 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3841 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3842 InductionOpcode(Opcode) {}
3843
3845 VPValue *Step, VPValue *VF,
3848 IV, Step, VF, IndDesc.getInductionOpcode(),
3849 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3850 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3851 : FastMathFlags(),
3852 DL) {}
3853
3854 ~VPScalarIVStepsRecipe() override = default;
3855
3857 return new VPScalarIVStepsRecipe(
3858 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3860 getDebugLoc());
3861 }
3862
3863 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3864
3865 /// Generate the scalarized versions of the phi node as needed by their users.
3866 void execute(VPTransformState &State) override;
3867
3868 /// Return the cost of this VPScalarIVStepsRecipe.
3870 VPCostContext &Ctx) const override {
3871 // TODO: Compute accurate cost after retiring the legacy cost model.
3872 return 0;
3873 }
3874
3875 VPValue *getStepValue() const { return getOperand(1); }
3876
3877 /// Return the number of scalars to produce per unroll part, used to compute
3878 /// StartIndex during unrolling.
3879 VPValue *getVFValue() const { return getOperand(2); }
3880
3881 /// Return the StartIndex, or null if known to be zero, valid only after
3882 /// unrolling.
3884 return getNumOperands() == 4 ? getOperand(3) : nullptr;
3885 }
3886
3887 /// Returns true if the recipe only uses the first lane of operand \p Op.
3888 bool usesFirstLaneOnly(const VPValue *Op) const override {
3890 "Op must be an operand of the recipe");
3891 return true;
3892 }
3893
3894protected:
3895#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3896 /// Print the recipe.
3897 void printRecipe(raw_ostream &O, const Twine &Indent,
3898 VPSlotTracker &SlotTracker) const override;
3899#endif
3900};
3901
3902/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3903/// types implementing VPPhiAccessors. Used by isa<> & co.
3905 static inline bool isPossible(const VPRecipeBase *f) {
3906 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3908 }
3909};
3910/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3911/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3912template <typename SrcTy>
3913struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3914
3916
3917 /// doCast is used by cast<>.
3918 static inline VPPhiAccessors *doCast(SrcTy R) {
3919 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3920 switch (R->getVPDefID()) {
3921 case VPDef::VPInstructionSC:
3922 return cast<VPPhi>(R);
3923 case VPDef::VPIRInstructionSC:
3924 return cast<VPIRPhi>(R);
3925 case VPDef::VPWidenPHISC:
3926 return cast<VPWidenPHIRecipe>(R);
3927 default:
3928 return cast<VPHeaderPHIRecipe>(R);
3929 }
3930 }());
3931 }
3932
3933 /// doCastIfPossible is used by dyn_cast<>.
3934 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3935 if (!Self::isPossible(f))
3936 return nullptr;
3937 return doCast(f);
3938 }
3939};
3940template <>
3943template <>
3946
3947/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3948/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3949namespace detail {
3950template <typename DstTy, typename RecipeBasePtrTy>
3951static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3952 switch (R->getVPDefID()) {
3953 case VPDef::VPInstructionSC:
3954 return cast<VPInstruction>(R);
3955 case VPDef::VPWidenSC:
3956 return cast<VPWidenRecipe>(R);
3957 case VPDef::VPWidenCastSC:
3958 return cast<VPWidenCastRecipe>(R);
3959 case VPDef::VPWidenIntrinsicSC:
3961 case VPDef::VPWidenCallSC:
3962 return cast<VPWidenCallRecipe>(R);
3963 case VPDef::VPReplicateSC:
3964 return cast<VPReplicateRecipe>(R);
3965 case VPDef::VPInterleaveSC:
3966 case VPDef::VPInterleaveEVLSC:
3967 return cast<VPInterleaveBase>(R);
3968 case VPDef::VPWidenLoadSC:
3969 case VPDef::VPWidenLoadEVLSC:
3970 case VPDef::VPWidenStoreSC:
3971 case VPDef::VPWidenStoreEVLSC:
3972 return cast<VPWidenMemoryRecipe>(R);
3973 default:
3974 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3975 }
3976}
3977} // namespace detail
3978
3979/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3980/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3981template <typename DstTy, typename SrcTy>
3982struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3983 static inline bool isPossible(SrcTy R) {
3984 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3985 // also handled in castToVPIRMetadata.
3990 R);
3991 }
3992
3993 using RetTy = DstTy *;
3994
3995 /// doCast is used by cast<>.
3996 static inline RetTy doCast(SrcTy R) {
3998 }
3999
4000 /// doCastIfPossible is used by dyn_cast<>.
4001 static inline RetTy doCastIfPossible(SrcTy R) {
4002 if (!isPossible(R))
4003 return nullptr;
4004 return doCast(R);
4005 }
4006};
4007template <>
4010template <>
4013
4014/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4015/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4016/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4017class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4018 friend class VPlan;
4019
4020 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4021 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4022 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4023 if (Recipe)
4024 appendRecipe(Recipe);
4025 }
4026
4027public:
4029
4030protected:
4031 /// The VPRecipes held in the order of output instructions to generate.
4033
4034 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4035 : VPBlockBase(BlockSC, Name.str()) {}
4036
4037public:
4038 ~VPBasicBlock() override {
4039 while (!Recipes.empty())
4040 Recipes.pop_back();
4041 }
4042
4043 /// Instruction iterators...
4048
4049 //===--------------------------------------------------------------------===//
4050 /// Recipe iterator methods
4051 ///
4052 inline iterator begin() { return Recipes.begin(); }
4053 inline const_iterator begin() const { return Recipes.begin(); }
4054 inline iterator end() { return Recipes.end(); }
4055 inline const_iterator end() const { return Recipes.end(); }
4056
4057 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4058 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4059 inline reverse_iterator rend() { return Recipes.rend(); }
4060 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4061
4062 inline size_t size() const { return Recipes.size(); }
4063 inline bool empty() const { return Recipes.empty(); }
4064 inline const VPRecipeBase &front() const { return Recipes.front(); }
4065 inline VPRecipeBase &front() { return Recipes.front(); }
4066 inline const VPRecipeBase &back() const { return Recipes.back(); }
4067 inline VPRecipeBase &back() { return Recipes.back(); }
4068
4069 /// Returns a reference to the list of recipes.
4071
4072 /// Returns a pointer to a member of the recipe list.
4073 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4074 return &VPBasicBlock::Recipes;
4075 }
4076
4077 /// Method to support type inquiry through isa, cast, and dyn_cast.
4078 static inline bool classof(const VPBlockBase *V) {
4079 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4080 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4081 }
4082
4083 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4084 assert(Recipe && "No recipe to append.");
4085 assert(!Recipe->Parent && "Recipe already in VPlan");
4086 Recipe->Parent = this;
4087 Recipes.insert(InsertPt, Recipe);
4088 }
4089
4090 /// Augment the existing recipes of a VPBasicBlock with an additional
4091 /// \p Recipe as the last recipe.
4092 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4093
4094 /// The method which generates the output IR instructions that correspond to
4095 /// this VPBasicBlock, thereby "executing" the VPlan.
4096 void execute(VPTransformState *State) override;
4097
4098 /// Return the cost of this VPBasicBlock.
4099 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4100
4101 /// Return the position of the first non-phi node recipe in the block.
4102 iterator getFirstNonPhi();
4103
4104 /// Returns an iterator range over the PHI-like recipes in the block.
4108
4109 /// Split current block at \p SplitAt by inserting a new block between the
4110 /// current block and its successors and moving all recipes starting at
4111 /// SplitAt to the new block. Returns the new block.
4112 VPBasicBlock *splitAt(iterator SplitAt);
4113
4114 VPRegionBlock *getEnclosingLoopRegion();
4115 const VPRegionBlock *getEnclosingLoopRegion() const;
4116
4117#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4118 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4119 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4120 ///
4121 /// Note that the numbering is applied to the whole VPlan, so printing
4122 /// individual blocks is consistent with the whole VPlan printing.
4123 void print(raw_ostream &O, const Twine &Indent,
4124 VPSlotTracker &SlotTracker) const override;
4125 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4126#endif
4127
4128 /// If the block has multiple successors, return the branch recipe terminating
4129 /// the block. If there are no or only a single successor, return nullptr;
4130 VPRecipeBase *getTerminator();
4131 const VPRecipeBase *getTerminator() const;
4132
4133 /// Returns true if the block is exiting it's parent region.
4134 bool isExiting() const;
4135
4136 /// Clone the current block and it's recipes, without updating the operands of
4137 /// the cloned recipes.
4138 VPBasicBlock *clone() override;
4139
4140 /// Returns the predecessor block at index \p Idx with the predecessors as per
4141 /// the corresponding plain CFG. If the block is an entry block to a region,
4142 /// the first predecessor is the single predecessor of a region, and the
4143 /// second predecessor is the exiting block of the region.
4144 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4145
4146protected:
4147 /// Execute the recipes in the IR basic block \p BB.
4148 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4149
4150 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4151 /// generated for this VPBB.
4152 void connectToPredecessors(VPTransformState &State);
4153
4154private:
4155 /// Create an IR BasicBlock to hold the output instructions generated by this
4156 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4157 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4158};
4159
4160inline const VPBasicBlock *
4162 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4163}
4164
4165/// A special type of VPBasicBlock that wraps an existing IR basic block.
4166/// Recipes of the block get added before the first non-phi instruction in the
4167/// wrapped block.
4168/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4169/// preheader block.
4170class VPIRBasicBlock : public VPBasicBlock {
4171 friend class VPlan;
4172
4173 BasicBlock *IRBB;
4174
4175 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4176 VPIRBasicBlock(BasicBlock *IRBB)
4177 : VPBasicBlock(VPIRBasicBlockSC,
4178 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4179 IRBB(IRBB) {}
4180
4181public:
4182 ~VPIRBasicBlock() override = default;
4183
4184 static inline bool classof(const VPBlockBase *V) {
4185 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4186 }
4187
4188 /// The method which generates the output IR instructions that correspond to
4189 /// this VPBasicBlock, thereby "executing" the VPlan.
4190 void execute(VPTransformState *State) override;
4191
4192 VPIRBasicBlock *clone() override;
4193
4194 BasicBlock *getIRBasicBlock() const { return IRBB; }
4195};
4196
4197/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4198/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4199/// A VPRegionBlock may indicate that its contents are to be replicated several
4200/// times. This is designed to support predicated scalarization, in which a
4201/// scalar if-then code structure needs to be generated VF * UF times. Having
4202/// this replication indicator helps to keep a single model for multiple
4203/// candidate VF's. The actual replication takes place only once the desired VF
4204/// and UF have been determined.
4205class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4206 friend class VPlan;
4207
4208 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4209 VPBlockBase *Entry;
4210
4211 /// Hold the Single Exiting block of the SESE region modelled by the
4212 /// VPRegionBlock.
4213 VPBlockBase *Exiting;
4214
4215 /// An indicator whether this region is to generate multiple replicated
4216 /// instances of output IR corresponding to its VPBlockBases.
4217 bool IsReplicator;
4218
4219 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4220 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4221 const std::string &Name = "", bool IsReplicator = false)
4222 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4223 IsReplicator(IsReplicator) {
4224 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4225 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4226 Entry->setParent(this);
4227 Exiting->setParent(this);
4228 }
4229 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4230 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4231 IsReplicator(IsReplicator) {}
4232
4233public:
4234 ~VPRegionBlock() override = default;
4235
4236 /// Method to support type inquiry through isa, cast, and dyn_cast.
4237 static inline bool classof(const VPBlockBase *V) {
4238 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4239 }
4240
4241 const VPBlockBase *getEntry() const { return Entry; }
4242 VPBlockBase *getEntry() { return Entry; }
4243
4244 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4245 /// EntryBlock must have no predecessors.
4246 void setEntry(VPBlockBase *EntryBlock) {
4247 assert(EntryBlock->getPredecessors().empty() &&
4248 "Entry block cannot have predecessors.");
4249 Entry = EntryBlock;
4250 EntryBlock->setParent(this);
4251 }
4252
4253 const VPBlockBase *getExiting() const { return Exiting; }
4254 VPBlockBase *getExiting() { return Exiting; }
4255
4256 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4257 /// ExitingBlock must have no successors.
4258 void setExiting(VPBlockBase *ExitingBlock) {
4259 assert(ExitingBlock->getSuccessors().empty() &&
4260 "Exit block cannot have successors.");
4261 Exiting = ExitingBlock;
4262 ExitingBlock->setParent(this);
4263 }
4264
4265 /// Returns the pre-header VPBasicBlock of the loop region.
4267 assert(!isReplicator() && "should only get pre-header of loop regions");
4268 return getSinglePredecessor()->getExitingBasicBlock();
4269 }
4270
4271 /// An indicator whether this region is to generate multiple replicated
4272 /// instances of output IR corresponding to its VPBlockBases.
4273 bool isReplicator() const { return IsReplicator; }
4274
4275 /// The method which generates the output IR instructions that correspond to
4276 /// this VPRegionBlock, thereby "executing" the VPlan.
4277 void execute(VPTransformState *State) override;
4278
4279 // Return the cost of this region.
4280 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4281
4282#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4283 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4284 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4285 /// consequtive numbers.
4286 ///
4287 /// Note that the numbering is applied to the whole VPlan, so printing
4288 /// individual regions is consistent with the whole VPlan printing.
4289 void print(raw_ostream &O, const Twine &Indent,
4290 VPSlotTracker &SlotTracker) const override;
4291 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4292#endif
4293
4294 /// Clone all blocks in the single-entry single-exit region of the block and
4295 /// their recipes without updating the operands of the cloned recipes.
4296 VPRegionBlock *clone() override;
4297
4298 /// Remove the current region from its VPlan, connecting its predecessor to
4299 /// its entry, and its exiting block to its successor.
4300 void dissolveToCFGLoop();
4301
4302 /// Returns the canonical induction recipe of the region.
4304 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4305 if (EntryVPBB->empty()) {
4306 // VPlan native path. TODO: Unify both code paths.
4307 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4308 }
4309 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4310 }
4312 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4313 }
4314
4315 /// Return the type of the canonical IV for loop regions.
4316 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4317 const Type *getCanonicalIVType() const {
4318 return getCanonicalIV()->getScalarType();
4319 }
4320};
4321
4323 return getParent()->getParent();
4324}
4325
4327 return getParent()->getParent();
4328}
4329
4330/// VPlan models a candidate for vectorization, encoding various decisions take
4331/// to produce efficient output IR, including which branches, basic-blocks and
4332/// output IR instructions to generate, and their cost. VPlan holds a
4333/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4334/// VPBasicBlock.
4335class VPlan {
4336 friend class VPlanPrinter;
4337 friend class VPSlotTracker;
4338
4339 /// VPBasicBlock corresponding to the original preheader. Used to place
4340 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4341 /// rest of VPlan execution.
4342 /// When this VPlan is used for the epilogue vector loop, the entry will be
4343 /// replaced by a new entry block created during skeleton creation.
4344 VPBasicBlock *Entry;
4345
4346 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4347 VPIRBasicBlock *ScalarHeader;
4348
4349 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4350 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4351 /// e.g. if the scalar epilogue always executes.
4353
4354 /// Holds the VFs applicable to this VPlan.
4356
4357 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4358 /// any UF.
4360
4361 /// Holds the name of the VPlan, for printing.
4362 std::string Name;
4363
4364 /// Represents the trip count of the original loop, for folding
4365 /// the tail.
4366 VPValue *TripCount = nullptr;
4367
4368 /// Represents the backedge taken count of the original loop, for folding
4369 /// the tail. It equals TripCount - 1.
4370 VPSymbolicValue *BackedgeTakenCount = nullptr;
4371
4372 /// Represents the vector trip count.
4373 VPSymbolicValue VectorTripCount;
4374
4375 /// Represents the vectorization factor of the loop.
4376 VPSymbolicValue VF;
4377
4378 /// Represents the loop-invariant VF * UF of the vector loop region.
4379 VPSymbolicValue VFxUF;
4380
4381 /// Contains all the external definitions created for this VPlan, as a mapping
4382 /// from IR Values to VPIRValues.
4384
4385 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4386 /// VPlan is destroyed.
4387 SmallVector<VPBlockBase *> CreatedBlocks;
4388
4389 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4390 /// wrapping the original header of the scalar loop.
4391 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4392 : Entry(Entry), ScalarHeader(ScalarHeader) {
4393 Entry->setPlan(this);
4394 assert(ScalarHeader->getNumSuccessors() == 0 &&
4395 "scalar header must be a leaf node");
4396 }
4397
4398public:
4399 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4400 /// original preheader and scalar header of \p L, to be used as entry and
4401 /// scalar header blocks of the new VPlan.
4402 VPlan(Loop *L);
4403
4404 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4405 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4406 VPlan(BasicBlock *ScalarHeaderBB) {
4407 setEntry(createVPBasicBlock("preheader"));
4408 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4409 }
4410
4412
4414 Entry = VPBB;
4415 VPBB->setPlan(this);
4416 }
4417
4418 /// Generate the IR code for this VPlan.
4419 void execute(VPTransformState *State);
4420
4421 /// Return the cost of this plan.
4423
4424 VPBasicBlock *getEntry() { return Entry; }
4425 const VPBasicBlock *getEntry() const { return Entry; }
4426
4427 /// Returns the preheader of the vector loop region, if one exists, or null
4428 /// otherwise.
4430 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4431 return VectorRegion
4432 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4433 : nullptr;
4434 }
4435
4436 /// Returns the VPRegionBlock of the vector loop.
4439
4440 /// Returns the 'middle' block of the plan, that is the block that selects
4441 /// whether to execute the scalar tail loop or the exit block from the loop
4442 /// latch. If there is an early exit from the vector loop, the middle block
4443 /// conceptully has the early exit block as third successor, split accross 2
4444 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4445 /// tail loop or the exit block. If the scalar tail loop or exit block are
4446 /// known to always execute, the middle block may branch directly to that
4447 /// block. This function cannot be called once the vector loop region has been
4448 /// removed.
4450 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4451 assert(
4452 LoopRegion &&
4453 "cannot call the function after vector loop region has been removed");
4454 // The middle block is always the last successor of the region.
4455 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4456 }
4457
4459 return const_cast<VPlan *>(this)->getMiddleBlock();
4460 }
4461
4462 /// Return the VPBasicBlock for the preheader of the scalar loop.
4464 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4465 }
4466
4467 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4468 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4469
4470 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4471 /// the original scalar loop.
4472 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4473
4474 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4475 /// exit block.
4477
4478 /// Returns true if \p VPBB is an exit block.
4479 bool isExitBlock(VPBlockBase *VPBB);
4480
4481 /// The trip count of the original loop.
4483 assert(TripCount && "trip count needs to be set before accessing it");
4484 return TripCount;
4485 }
4486
4487 /// Set the trip count assuming it is currently null; if it is not - use
4488 /// resetTripCount().
4489 void setTripCount(VPValue *NewTripCount) {
4490 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4491 TripCount = NewTripCount;
4492 }
4493
4494 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4495 /// the original trip count have been replaced.
4496 void resetTripCount(VPValue *NewTripCount) {
4497 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4498 "TripCount must be set when resetting");
4499 TripCount = NewTripCount;
4500 }
4501
4502 /// The backedge taken count of the original loop.
4504 if (!BackedgeTakenCount)
4505 BackedgeTakenCount = new VPSymbolicValue();
4506 return BackedgeTakenCount;
4507 }
4508 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4509
4510 /// The vector trip count.
4511 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4512
4513 /// Returns the VF of the vector loop region.
4514 VPValue &getVF() { return VF; };
4515 const VPValue &getVF() const { return VF; };
4516
4517 /// Returns VF * UF of the vector loop region.
4518 VPValue &getVFxUF() { return VFxUF; }
4519
4522 }
4523
4524 void addVF(ElementCount VF) { VFs.insert(VF); }
4525
4527 assert(hasVF(VF) && "Cannot set VF not already in plan");
4528 VFs.clear();
4529 VFs.insert(VF);
4530 }
4531
4532 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4533 bool hasScalableVF() const {
4534 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4535 }
4536
4537 /// Returns an iterator range over all VFs of the plan.
4540 return VFs;
4541 }
4542
4543 bool hasScalarVFOnly() const {
4544 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4545 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4546 "Plan with scalar VF should only have a single VF");
4547 return HasScalarVFOnly;
4548 }
4549
4550 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4551
4552 unsigned getUF() const {
4553 assert(UFs.size() == 1 && "Expected a single UF");
4554 return UFs[0];
4555 }
4556
4557 void setUF(unsigned UF) {
4558 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4559 UFs.clear();
4560 UFs.insert(UF);
4561 }
4562
4563 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4564 /// concrete UF.
4565 bool isUnrolled() const { return UFs.size() == 1; }
4566
4567 /// Return a string with the name of the plan and the applicable VFs and UFs.
4568 std::string getName() const;
4569
4570 void setName(const Twine &newName) { Name = newName.str(); }
4571
4572 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4573 /// yet) for \p V.
4575 assert(V && "Trying to get or add the VPIRValue of a null Value");
4576 auto [It, Inserted] = LiveIns.try_emplace(V);
4577 if (Inserted) {
4578 if (auto *CI = dyn_cast<ConstantInt>(V))
4579 It->second = new VPConstantInt(CI);
4580 else
4581 It->second = new VPIRValue(V);
4582 }
4583
4584 assert(isa<VPIRValue>(It->second) &&
4585 "Only VPIRValues should be in mapping");
4586 return It->second;
4587 }
4589 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4590 return getOrAddLiveIn(V->getValue());
4591 }
4592
4593 /// Return a VPIRValue wrapping i1 true.
4594 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4595
4596 /// Return a VPIRValue wrapping i1 false.
4597 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4598
4599 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4600 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4601 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4602 }
4603
4604 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4605 /// value.
4607 bool IsSigned = false) {
4608 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4609 }
4610
4611 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4613 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4614 }
4615
4616 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4617 /// otherwise.
4618 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4619
4620 /// Return the list of live-in VPValues available in the VPlan.
4621 auto getLiveIns() const { return LiveIns.values(); }
4622
4623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4624 /// Print the live-ins of this VPlan to \p O.
4625 void printLiveIns(raw_ostream &O) const;
4626
4627 /// Print this VPlan to \p O.
4628 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4629
4630 /// Print this VPlan in DOT format to \p O.
4631 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4632
4633 /// Dump the plan to stderr (for debugging).
4634 LLVM_DUMP_METHOD void dump() const;
4635#endif
4636
4637 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4638 /// recipes to refer to the clones, and return it.
4640
4641 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4642 /// present. The returned block is owned by the VPlan and deleted once the
4643 /// VPlan is destroyed.
4645 VPRecipeBase *Recipe = nullptr) {
4646 auto *VPB = new VPBasicBlock(Name, Recipe);
4647 CreatedBlocks.push_back(VPB);
4648 return VPB;
4649 }
4650
4651 /// Create a new loop region with \p Name and entry and exiting blocks set
4652 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4653 /// owned by the VPlan and deleted once the VPlan is destroyed.
4654 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4655 VPBlockBase *Entry = nullptr,
4656 VPBlockBase *Exiting = nullptr) {
4657 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4658 : new VPRegionBlock(Name);
4659 CreatedBlocks.push_back(VPB);
4660 return VPB;
4661 }
4662
4663 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4664 /// returned block is owned by the VPlan and deleted once the VPlan is
4665 /// destroyed.
4667 const std::string &Name = "") {
4668 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4669 CreatedBlocks.push_back(VPB);
4670 return VPB;
4671 }
4672
4673 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4674 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4675 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4677
4678 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4679 /// instructions in \p IRBB, except its terminator which is managed by the
4680 /// successors of the block in VPlan. The returned block is owned by the VPlan
4681 /// and deleted once the VPlan is destroyed.
4683
4684 /// Returns true if the VPlan is based on a loop with an early exit. That is
4685 /// the case if the VPlan has either more than one exit block or a single exit
4686 /// block with multiple predecessors (one for the exit via the latch and one
4687 /// via the other early exit).
4688 bool hasEarlyExit() const {
4689 return count_if(ExitBlocks,
4690 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4691 1 ||
4692 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4693 }
4694
4695 /// Returns true if the scalar tail may execute after the vector loop. Note
4696 /// that this relies on unneeded branches to the scalar tail loop being
4697 /// removed.
4698 bool hasScalarTail() const {
4699 return !(!getScalarPreheader()->hasPredecessors() ||
4701 }
4702};
4703
4704#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4705inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4706 Plan.print(OS);
4707 return OS;
4708}
4709#endif
4710
4711} // end namespace llvm
4712
4713#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1079
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3658
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3652
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4017
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4045
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4092
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4047
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4044
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4070
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4028
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4034
iterator end()
Definition VPlan.h:4054
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4052
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4046
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4105
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:782
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:228
~VPBasicBlock() override
Definition VPlan.h:4038
const_reverse_iterator rbegin() const
Definition VPlan.h:4058
reverse_iterator rend()
Definition VPlan.h:4059
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4032
VPRecipeBase & back()
Definition VPlan.h:4067
const VPRecipeBase & front() const
Definition VPlan.h:4064
const_iterator begin() const
Definition VPlan.h:4053
VPRecipeBase & front()
Definition VPlan.h:4065
const VPRecipeBase & back() const
Definition VPlan.h:4066
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4083
bool empty() const
Definition VPlan.h:4063
const_iterator end() const
Definition VPlan.h:4055
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4078
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4073
reverse_iterator rbegin()
Definition VPlan.h:4057
friend class VPlan
Definition VPlan.h:4018
size_t size() const
Definition VPlan.h:4062
const_reverse_iterator rend() const
Definition VPlan.h:4060
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2585
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2590
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2580
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2601
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2610
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2567
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2562
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2596
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2576
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:657
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:220
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:192
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:212
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:178
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3082
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3066
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3090
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3063
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3593
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3595
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3620
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3600
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3627
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3614
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3617
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3608
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3634
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:356
VPDef(const unsigned char SC)
Definition VPlanValue.h:435
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:3808
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3800
VPValue * getStepValue() const
Definition VPlan.h:3809
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3772
Type * getScalarType() const
Definition VPlan.h:3806
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3788
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3812
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3780
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3708
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3689
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3695
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3701
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3684
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3568
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3573
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3559
const SCEV * getSCEV() const
Definition VPlan.h:3579
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3564
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3217
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3199
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3181
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3169
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3155
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3147
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3151
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3211
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3149
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2080
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2093
static bool classof(const VPValue *V)
Definition VPlan.h:2090
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2116
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2121
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2105
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2113
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2086
VPValue * getStartValue() const
Definition VPlan.h:2108
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2125
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2075
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1839
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1856
unsigned getOpcode() const
Definition VPlan.h:1852
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1833
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4170
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:457
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4194
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4184
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4171
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:482
Class to record and manage LLVM IR flags.
Definition VPlan.h:608
FastMathFlagsTy FMFs
Definition VPlan.h:695
ReductionFlagsTy ReductionFlags
Definition VPlan.h:697
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:765
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:756
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:748
WrapFlagsTy WrapFlags
Definition VPlan.h:689
CmpInst::Predicate CmpPredicate
Definition VPlan.h:688
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:742
GEPNoWrapFlags GEPFlags
Definition VPlan.h:693
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:881
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:931
TruncFlagsTy TruncFlags
Definition VPlan.h:690
CmpInst::Predicate getPredicate() const
Definition VPlan.h:858
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:889
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:769
ExactFlagsTy ExactFlags
Definition VPlan.h:692
bool hasNoSignedWrap() const
Definition VPlan.h:908
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:919
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:751
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:754
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:759
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:739
bool isNonNeg() const
Definition VPlan.h:891
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:873
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:876
DisjointFlagsTy DisjointFlags
Definition VPlan.h:691
unsigned AllFlags
Definition VPlan.h:698
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:864
bool hasNoUnsignedWrap() const
Definition VPlan.h:897
FCmpFlagsTy FCmpFlags
Definition VPlan.h:696
NonNegFlagsTy NonNegFlags
Definition VPlan.h:694
bool isReductionInLoop() const
Definition VPlan.h:937
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:779
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:816
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:762
RecurKind getRecurKind() const
Definition VPlan.h:925
VPIRFlags(Instruction &I)
Definition VPlan.h:704
Instruction & getInstruction() const
Definition VPlan.h:1510
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1518
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1497
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1524
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1512
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1485
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1031
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1067
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1039
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1051
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1324
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1365
static bool classof(const VPUser *R)
Definition VPlan.h:1350
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1332
Type * getResultType() const
Definition VPlan.h:1371
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1354
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1085
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1236
@ ExtractLastActive
Extracts the lane from the first operand corresponding to the last active (non-zero) lane in the mask...
Definition VPlan.h:1193
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1186
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1132
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1176
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1189
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1129
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1180
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1124
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1121
@ VScale
Returns the value for vscale.
Definition VPlan.h:1196
@ CanonicalIVIncrementForPart
Definition VPlan.h:1105
bool hasResult() const
Definition VPlan.h:1260
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1301
unsigned getOpcode() const
Definition VPlan.h:1244
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1304
friend class VPlanSlp
Definition VPlan.h:1086
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2697
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2703
static bool classof(const VPUser *U)
Definition VPlan.h:2679
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2645
Instruction * getInsertPos() const
Definition VPlan.h:2701
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2674
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2699
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2691
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2720
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2685
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2773
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2801
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2795
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2808
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2788
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2775
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2731
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2758
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2741
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2752
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2733
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1383
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1405
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1400
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4161
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1425
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1392
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1410
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1414
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3274
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3256
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3267
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3252
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4322
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:235
friend class VPDef
Definition VPlanValue.h:237
LLVM_ABI_FOR_TEST VPRecipeValue(VPDef *Def, Value *UV=nullptr)
Definition VPlan.cpp:139
friend class VPValue
Definition VPlanValue.h:236
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2958
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2937
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2961
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2948
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2523
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2509
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2488
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2502
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2535
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2517
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2526
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2540
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2477
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2532
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2520
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2824
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2833
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2900
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2869
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2884
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2911
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2913
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2896
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2847
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2898
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2854
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2902
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2909
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2904
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2863
static bool classof(const VPUser *U)
Definition VPlan.h:2874
static bool classof(const VPValue *VPV)
Definition VPlan.h:2879
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2918
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4205
const VPBlockBase * getEntry() const
Definition VPlan.h:4241
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4316
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4273
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4258
VPBlockBase * getExiting()
Definition VPlan.h:4254
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4303
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4246
const Type * getCanonicalIVType() const
Definition VPlan.h:4317
const VPBlockBase * getExiting() const
Definition VPlan.h:4253
VPBlockBase * getEntry()
Definition VPlan.h:4242
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4311
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4266
friend class VPlan
Definition VPlan.h:4206
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4237
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2980
bool isSingleScalar() const
Definition VPlan.h:3021
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2988
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3033
bool isPredicated() const
Definition VPlan.h:3023
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3002
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3026
unsigned getOpcode() const
Definition VPlan.h:3050
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3045
VPValue * getStepValue() const
Definition VPlan.h:3875
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3869
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3844
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3856
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:3883
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:3879
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3837
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3888
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:594
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:597
static bool classof(const VPUser *U)
Definition VPlan.h:586
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1019
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:253
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1428
operand_range operands()
Definition VPlanValue.h:321
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:297
unsigned getNumOperands() const
Definition VPlanValue.h:291
operand_iterator op_end()
Definition VPlanValue.h:319
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:292
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:272
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:315
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:314
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:47
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:133
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119
friend class VPRecipeValue
Definition VPlanValue.h:53
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:175
unsigned getNumUsers() const
Definition VPlanValue.h:107
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1955
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1976
const VPValue * getVFValue() const
Definition VPlan.h:1951
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1969
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1962
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1940
Type * getSourceElementType() const
Definition VPlan.h:2010
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2012
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2019
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1997
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2035
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2026
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1773
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1780
const_operand_range args() const
Definition VPlan.h:1813
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1794
operand_range args()
Definition VPlan.h:1812
Function * getCalledScalarFunction() const
Definition VPlan.h:1808
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3744
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3731
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3726
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1623
Instruction::CastOps getOpcode() const
Definition VPlan.h:1659
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1662
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1631
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1644
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1903
Type * getSourceElementType() const
Definition VPlan.h:1908
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1911
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1895
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1881
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2205
static bool classof(const VPValue *V)
Definition VPlan.h:2156
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2175
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2190
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2168
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2183
PHINode * getPHINode() const
Definition VPlan.h:2185
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2144
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2171
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2188
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2197
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2151
const VPValue * getVFValue() const
Definition VPlan.h:2178
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2161
const VPValue * getStepValue() const
Definition VPlan.h:2172
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2266
const TruncInst * getTruncInst() const
Definition VPlan.h:2282
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2260
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2270
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2252
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2226
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2281
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2235
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2297
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2277
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2290
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1673
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1704
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1744
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1753
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1690
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1759
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1725
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1756
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1747
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3305
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3302
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3345
static bool classof(const VPUser *U)
Definition VPlan.h:3339
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3368
Instruction & Ingredient
Definition VPlan.h:3293
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3328
Instruction & getIngredient() const
Definition VPlan.h:3376
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3299
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3332
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3359
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3296
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3355
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3315
void setMask(VPValue *Mask)
Definition VPlan.h:3307
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3365
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3352
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3349
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2395
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2362
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2369
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2324
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2333
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2314
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1575
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1589
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1614
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1579
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1604
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4335
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4618
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1117
friend class VPSlotTracker
Definition VPlan.h:4337
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1093
bool hasVF(ElementCount VF) const
Definition VPlan.h:4532
LLVMContext & getContext() const
Definition VPlan.h:4520
VPBasicBlock * getEntry()
Definition VPlan.h:4424
void setName(const Twine &newName)
Definition VPlan.h:4570
bool hasScalableVF() const
Definition VPlan.h:4533
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4518
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4514
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4482
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4503
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4539
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:901
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:879
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4588
const VPValue & getVF() const
Definition VPlan.h:4515
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:909
const VPBasicBlock * getEntry() const
Definition VPlan.h:4425
friend class VPlanPrinter
Definition VPlan.h:4336
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4597
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4612
unsigned getUF() const
Definition VPlan.h:4552
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4666
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1228
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4621
bool hasUF(unsigned UF) const
Definition VPlan.h:4550
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4472
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4511
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4508
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4574
void setVF(ElementCount VF)
Definition VPlan.h:4526
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4565
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4688
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1004
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4606
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4458
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4489
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4496
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4449
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4413
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4644
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1234
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4594
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4654
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1123
bool hasScalarVFOnly() const
Definition VPlan.h:4543
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4463
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:916
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1076
void addVF(ElementCount VF)
Definition VPlan.h:4524
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4468
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1038
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4429
void setUF(unsigned UF)
Definition VPlan.h:4557
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4698
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1164
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4406
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4600
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2497
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3951
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2449
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2447
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3982
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3996
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4001
static bool isPossible(SrcTy R)
Definition VPlan.h:3983
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3913
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3934
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3915
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3918
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3905
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2441
Possible variants of a reduction.
Definition VPlan.h:2439
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2444
unsigned VFScaleFactor
Definition VPlan.h:2445
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:204
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2410
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2422
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2402
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1556
VPIRPhi(PHINode &PN)
Definition VPlan.h:1549
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1551
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1567
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:186
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:137
static bool classof(const VPUser *U)
Definition VPlan.h:1443
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1458
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1473
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1440
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1453
static bool classof(const VPValue *V)
Definition VPlan.h:1448
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:973
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:979
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:974
static bool classof(const VPValue *V)
Definition VPlan.h:998
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:1005
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:993
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:226
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3424
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3437
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3425
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3447
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3382
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3404
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3383
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3392
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3508
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3520
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3509
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3533
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3523
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3465
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3483
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3474
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3489
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3466