LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47
48namespace llvm {
49
50class BasicBlock;
51class DominatorTree;
53class IRBuilderBase;
54struct VPTransformState;
55class raw_ostream;
57class SCEV;
58class Type;
59class VPBasicBlock;
60class VPBuilder;
61class VPDominatorTree;
62class VPRegionBlock;
63class VPlan;
64class VPLane;
66class VPlanSlp;
67class Value;
69
70struct VPCostContext;
71
72namespace Intrinsic {
73typedef unsigned ID;
74}
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
79/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81 friend class VPBlockUtils;
82
83 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
84
85 /// An optional name for the block.
86 std::string Name;
87
88 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
89 /// it is a topmost VPBlockBase.
90 VPRegionBlock *Parent = nullptr;
91
92 /// List of predecessor blocks.
94
95 /// List of successor blocks.
97
98 /// VPlan containing the block. Can only be set on the entry block of the
99 /// plan.
100 VPlan *Plan = nullptr;
101
102 /// Add \p Successor as the last successor to this block.
103 void appendSuccessor(VPBlockBase *Successor) {
104 assert(Successor && "Cannot add nullptr successor!");
105 Successors.push_back(Successor);
106 }
107
108 /// Add \p Predecessor as the last predecessor to this block.
109 void appendPredecessor(VPBlockBase *Predecessor) {
110 assert(Predecessor && "Cannot add nullptr predecessor!");
111 Predecessors.push_back(Predecessor);
112 }
113
114 /// Remove \p Predecessor from the predecessors of this block.
115 void removePredecessor(VPBlockBase *Predecessor) {
116 auto Pos = find(Predecessors, Predecessor);
117 assert(Pos && "Predecessor does not exist");
118 Predecessors.erase(Pos);
119 }
120
121 /// Remove \p Successor from the successors of this block.
122 void removeSuccessor(VPBlockBase *Successor) {
123 auto Pos = find(Successors, Successor);
124 assert(Pos && "Successor does not exist");
125 Successors.erase(Pos);
126 }
127
128 /// This function replaces one predecessor with another, useful when
129 /// trying to replace an old block in the CFG with a new one.
130 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
131 auto I = find(Predecessors, Old);
132 assert(I != Predecessors.end());
133 assert(Old->getParent() == New->getParent() &&
134 "replaced predecessor must have the same parent");
135 *I = New;
136 }
137
138 /// This function replaces one successor with another, useful when
139 /// trying to replace an old block in the CFG with a new one.
140 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
141 auto I = find(Successors, Old);
142 assert(I != Successors.end());
143 assert(Old->getParent() == New->getParent() &&
144 "replaced successor must have the same parent");
145 *I = New;
146 }
147
148protected:
149 VPBlockBase(const unsigned char SC, const std::string &N)
150 : SubclassID(SC), Name(N) {}
151
152public:
153 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
154 /// that are actually instantiated. Values of this enumeration are kept in the
155 /// SubclassID field of the VPBlockBase objects. They are used for concrete
156 /// type identification.
157 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
158
160
161 virtual ~VPBlockBase() = default;
162
163 const std::string &getName() const { return Name; }
164
165 void setName(const Twine &newName) { Name = newName.str(); }
166
167 /// \return an ID for the concrete type of this object.
168 /// This is used to implement the classof checks. This should not be used
169 /// for any other purpose, as the values may change as LLVM evolves.
170 unsigned getVPBlockID() const { return SubclassID; }
171
172 VPRegionBlock *getParent() { return Parent; }
173 const VPRegionBlock *getParent() const { return Parent; }
174
175 /// \return A pointer to the plan containing the current block.
176 VPlan *getPlan();
177 const VPlan *getPlan() const;
178
179 /// Sets the pointer of the plan containing the block. The block must be the
180 /// entry block into the VPlan.
181 void setPlan(VPlan *ParentPlan);
182
183 void setParent(VPRegionBlock *P) { Parent = P; }
184
185 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
186 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
187 /// VPBlockBase is a VPBasicBlock, it is returned.
188 const VPBasicBlock *getEntryBasicBlock() const;
189 VPBasicBlock *getEntryBasicBlock();
190
191 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
192 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
193 /// VPBlockBase is a VPBasicBlock, it is returned.
194 const VPBasicBlock *getExitingBasicBlock() const;
195 VPBasicBlock *getExitingBasicBlock();
196
197 const VPBlocksTy &getSuccessors() const { return Successors; }
198 VPBlocksTy &getSuccessors() { return Successors; }
199
202
203 const VPBlocksTy &getPredecessors() const { return Predecessors; }
204 VPBlocksTy &getPredecessors() { return Predecessors; }
205
206 /// \return the successor of this VPBlockBase if it has a single successor.
207 /// Otherwise return a null pointer.
209 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
210 }
211
212 /// \return the predecessor of this VPBlockBase if it has a single
213 /// predecessor. Otherwise return a null pointer.
215 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
216 }
217
218 size_t getNumSuccessors() const { return Successors.size(); }
219 size_t getNumPredecessors() const { return Predecessors.size(); }
220
221 /// Returns true if this block has any predecessors.
222 bool hasPredecessors() const { return !Predecessors.empty(); }
223
224 /// An Enclosing Block of a block B is any block containing B, including B
225 /// itself. \return the closest enclosing block starting from "this", which
226 /// has successors. \return the root enclosing block if all enclosing blocks
227 /// have no successors.
228 VPBlockBase *getEnclosingBlockWithSuccessors();
229
230 /// \return the closest enclosing block starting from "this", which has
231 /// predecessors. \return the root enclosing block if all enclosing blocks
232 /// have no predecessors.
233 VPBlockBase *getEnclosingBlockWithPredecessors();
234
235 /// \return the successors either attached directly to this VPBlockBase or, if
236 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
237 /// successors of its own, search recursively for the first enclosing
238 /// VPRegionBlock that has successors and return them. If no such
239 /// VPRegionBlock exists, return the (empty) successors of the topmost
240 /// VPBlockBase reached.
242 return getEnclosingBlockWithSuccessors()->getSuccessors();
243 }
244
245 /// \return the hierarchical successor of this VPBlockBase if it has a single
246 /// hierarchical successor. Otherwise return a null pointer.
248 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
249 }
250
251 /// \return the predecessors either attached directly to this VPBlockBase or,
252 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
253 /// predecessors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has predecessors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithPredecessors()->getPredecessors();
259 }
260
261 /// \return the hierarchical predecessor of this VPBlockBase if it has a
262 /// single hierarchical predecessor. Otherwise return a null pointer.
266
267 /// Set a given VPBlockBase \p Successor as the single successor of this
268 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
269 /// This VPBlockBase must have no successors.
271 assert(Successors.empty() && "Setting one successor when others exist.");
272 assert(Successor->getParent() == getParent() &&
273 "connected blocks must have the same parent");
274 appendSuccessor(Successor);
275 }
276
277 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
278 /// successors of this VPBlockBase. This VPBlockBase is not added as
279 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
280 /// successors.
281 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
282 assert(Successors.empty() && "Setting two successors when others exist.");
283 appendSuccessor(IfTrue);
284 appendSuccessor(IfFalse);
285 }
286
287 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
288 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
289 /// as successor of any VPBasicBlock in \p NewPreds.
291 assert(Predecessors.empty() && "Block predecessors already set.");
292 for (auto *Pred : NewPreds)
293 appendPredecessor(Pred);
294 }
295
296 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
297 /// This VPBlockBase must have no successors. This VPBlockBase is not added
298 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 assert(Successors.empty() && "Block successors already set.");
301 for (auto *Succ : NewSuccs)
302 appendSuccessor(Succ);
303 }
304
305 /// Remove all the predecessor of this block.
306 void clearPredecessors() { Predecessors.clear(); }
307
308 /// Remove all the successors of this block.
309 void clearSuccessors() { Successors.clear(); }
310
311 /// Swap predecessors of the block. The block must have exactly 2
312 /// predecessors.
314 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
315 std::swap(Predecessors[0], Predecessors[1]);
316 }
317
318 /// Swap successors of the block. The block must have exactly 2 successors.
319 // TODO: This should be part of introducing conditional branch recipes rather
320 // than being independent.
322 assert(Successors.size() == 2 && "must have 2 successors to swap");
323 std::swap(Successors[0], Successors[1]);
324 }
325
326 /// Returns the index for \p Pred in the blocks predecessors list.
327 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
328 assert(count(Predecessors, Pred) == 1 &&
329 "must have Pred exactly once in Predecessors");
330 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
331 }
332
333 /// Returns the index for \p Succ in the blocks successor list.
334 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
335 assert(count(Successors, Succ) == 1 &&
336 "must have Succ exactly once in Successors");
337 return std::distance(Successors.begin(), find(Successors, Succ));
338 }
339
340 /// The method which generates the output IR that correspond to this
341 /// VPBlockBase, thereby "executing" the VPlan.
342 virtual void execute(VPTransformState *State) = 0;
343
344 /// Return the cost of the block.
346
347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
348 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
349 OS << getName();
350 }
351
352 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
353 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
354 /// consequtive numbers.
355 ///
356 /// Note that the numbering is applied to the whole VPlan, so printing
357 /// individual blocks is consistent with the whole VPlan printing.
358 virtual void print(raw_ostream &O, const Twine &Indent,
359 VPSlotTracker &SlotTracker) const = 0;
360
361 /// Print plain-text dump of this VPlan to \p O.
362 void print(raw_ostream &O) const;
363
364 /// Print the successors of this block to \p O, prefixing all lines with \p
365 /// Indent.
366 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
367
368 /// Dump this VPBlockBase to dbgs().
369 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
370#endif
371
372 /// Clone the current block and it's recipes without updating the operands of
373 /// the cloned recipes, including all blocks in the single-entry single-exit
374 /// region for VPRegionBlocks.
375 virtual VPBlockBase *clone() = 0;
376};
377
378/// VPRecipeBase is a base class modeling a sequence of one or more output IR
379/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
380/// and is responsible for deleting its defined values. Single-value
381/// recipes must inherit from VPSingleDef instead of inheriting from both
382/// VPRecipeBase and VPValue separately.
384 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
385 public VPDef,
386 public VPUser {
387 friend VPBasicBlock;
388 friend class VPBlockUtils;
389
390 /// Each VPRecipe belongs to a single VPBasicBlock.
391 VPBasicBlock *Parent = nullptr;
392
393 /// The debug location for the recipe.
394 DebugLoc DL;
395
396public:
397 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
399 : VPDef(SC), VPUser(Operands), DL(DL) {}
400
401 ~VPRecipeBase() override = default;
402
403 /// Clone the current recipe.
404 virtual VPRecipeBase *clone() = 0;
405
406 /// \return the VPBasicBlock which this VPRecipe belongs to.
407 VPBasicBlock *getParent() { return Parent; }
408 const VPBasicBlock *getParent() const { return Parent; }
409
410 /// \return the VPRegionBlock which the recipe belongs to.
411 VPRegionBlock *getRegion();
412 const VPRegionBlock *getRegion() const;
413
414 /// The method which generates the output IR instructions that correspond to
415 /// this VPRecipe, thereby "executing" the VPlan.
416 virtual void execute(VPTransformState &State) = 0;
417
418 /// Return the cost of this recipe, taking into account if the cost
419 /// computation should be skipped and the ForceTargetInstructionCost flag.
420 /// Also takes care of printing the cost for debugging.
422
423 /// Insert an unlinked recipe into a basic block immediately before
424 /// the specified recipe.
425 void insertBefore(VPRecipeBase *InsertPos);
426 /// Insert an unlinked recipe into \p BB immediately before the insertion
427 /// point \p IP;
428 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
429
430 /// Insert an unlinked Recipe into a basic block immediately after
431 /// the specified Recipe.
432 void insertAfter(VPRecipeBase *InsertPos);
433
434 /// Unlink this recipe from its current VPBasicBlock and insert it into
435 /// the VPBasicBlock that MovePos lives in, right after MovePos.
436 void moveAfter(VPRecipeBase *MovePos);
437
438 /// Unlink this recipe and insert into BB before I.
439 ///
440 /// \pre I is a valid iterator into BB.
441 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
442
443 /// This method unlinks 'this' from the containing basic block, but does not
444 /// delete it.
445 void removeFromParent();
446
447 /// This method unlinks 'this' from the containing basic block and deletes it.
448 ///
449 /// \returns an iterator pointing to the element after the erased one
451
452 /// Method to support type inquiry through isa, cast, and dyn_cast.
453 static inline bool classof(const VPDef *D) {
454 // All VPDefs are also VPRecipeBases.
455 return true;
456 }
457
458 static inline bool classof(const VPUser *U) { return true; }
459
460 /// Returns true if the recipe may have side-effects.
461 bool mayHaveSideEffects() const;
462
463 /// Returns true for PHI-like recipes.
464 bool isPhi() const;
465
466 /// Returns true if the recipe may read from memory.
467 bool mayReadFromMemory() const;
468
469 /// Returns true if the recipe may write to memory.
470 bool mayWriteToMemory() const;
471
472 /// Returns true if the recipe may read from or write to memory.
473 bool mayReadOrWriteMemory() const {
475 }
476
477 /// Returns the debug location of the recipe.
478 DebugLoc getDebugLoc() const { return DL; }
479
480 /// Return true if the recipe is a scalar cast.
481 bool isScalarCast() const;
482
483 /// Set the recipe's debug location to \p NewDL.
484 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
485
486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
487 /// Print the recipe, delegating to printRecipe().
488 void print(raw_ostream &O, const Twine &Indent,
489 VPSlotTracker &SlotTracker) const override final;
490#endif
491
492protected:
493 /// Compute the cost of this recipe either using a recipe's specialized
494 /// implementation or using the legacy cost model and the underlying
495 /// instructions.
496 virtual InstructionCost computeCost(ElementCount VF,
497 VPCostContext &Ctx) const;
498
499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
500 /// Each concrete VPRecipe prints itself, without printing common information,
501 /// like debug info or metadata.
502 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
503 VPSlotTracker &SlotTracker) const = 0;
504#endif
505};
506
507// Helper macro to define common classof implementations for recipes.
508#define VP_CLASSOF_IMPL(VPDefID) \
509 static inline bool classof(const VPDef *D) { \
510 return D->getVPDefID() == VPDefID; \
511 } \
512 static inline bool classof(const VPValue *V) { \
513 auto *R = V->getDefiningRecipe(); \
514 return R && R->getVPDefID() == VPDefID; \
515 } \
516 static inline bool classof(const VPUser *U) { \
517 auto *R = dyn_cast<VPRecipeBase>(U); \
518 return R && R->getVPDefID() == VPDefID; \
519 } \
520 static inline bool classof(const VPRecipeBase *R) { \
521 return R->getVPDefID() == VPDefID; \
522 } \
523 static inline bool classof(const VPSingleDefRecipe *R) { \
524 return R->getVPDefID() == VPDefID; \
525 }
526
527/// VPSingleDef is a base class for recipes for modeling a sequence of one or
528/// more output IR that define a single result VPValue.
529/// Note that VPRecipeBase must be inherited from before VPValue.
530class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
531public:
532 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
534 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
535
536 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
538 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
539
540 static inline bool classof(const VPRecipeBase *R) {
541 switch (R->getVPDefID()) {
542 case VPRecipeBase::VPDerivedIVSC:
543 case VPRecipeBase::VPEVLBasedIVPHISC:
544 case VPRecipeBase::VPExpandSCEVSC:
545 case VPRecipeBase::VPExpressionSC:
546 case VPRecipeBase::VPInstructionSC:
547 case VPRecipeBase::VPReductionEVLSC:
548 case VPRecipeBase::VPReductionSC:
549 case VPRecipeBase::VPReplicateSC:
550 case VPRecipeBase::VPScalarIVStepsSC:
551 case VPRecipeBase::VPVectorPointerSC:
552 case VPRecipeBase::VPVectorEndPointerSC:
553 case VPRecipeBase::VPWidenCallSC:
554 case VPRecipeBase::VPWidenCanonicalIVSC:
555 case VPRecipeBase::VPWidenCastSC:
556 case VPRecipeBase::VPWidenGEPSC:
557 case VPRecipeBase::VPWidenIntrinsicSC:
558 case VPRecipeBase::VPWidenSC:
559 case VPRecipeBase::VPWidenSelectSC:
560 case VPRecipeBase::VPBlendSC:
561 case VPRecipeBase::VPPredInstPHISC:
562 case VPRecipeBase::VPCanonicalIVPHISC:
563 case VPRecipeBase::VPActiveLaneMaskPHISC:
564 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565 case VPRecipeBase::VPWidenPHISC:
566 case VPRecipeBase::VPWidenIntOrFpInductionSC:
567 case VPRecipeBase::VPWidenPointerInductionSC:
568 case VPRecipeBase::VPReductionPHISC:
569 case VPRecipeBase::VPPartialReductionSC:
570 return true;
571 case VPRecipeBase::VPBranchOnMaskSC:
572 case VPRecipeBase::VPInterleaveEVLSC:
573 case VPRecipeBase::VPInterleaveSC:
574 case VPRecipeBase::VPIRInstructionSC:
575 case VPRecipeBase::VPWidenLoadEVLSC:
576 case VPRecipeBase::VPWidenLoadSC:
577 case VPRecipeBase::VPWidenStoreEVLSC:
578 case VPRecipeBase::VPWidenStoreSC:
579 case VPRecipeBase::VPHistogramSC:
580 // TODO: Widened stores don't define a value, but widened loads do. Split
581 // the recipes to be able to make widened loads VPSingleDefRecipes.
582 return false;
583 }
584 llvm_unreachable("Unhandled VPDefID");
585 }
586
587 static inline bool classof(const VPUser *U) {
588 auto *R = dyn_cast<VPRecipeBase>(U);
589 return R && classof(R);
590 }
591
592 VPSingleDefRecipe *clone() override = 0;
593
594 /// Returns the underlying instruction.
601
602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
605#endif
606};
607
608/// Class to record and manage LLVM IR flags.
610 enum class OperationType : unsigned char {
611 Cmp,
612 FCmp,
613 OverflowingBinOp,
614 Trunc,
615 DisjointOp,
616 PossiblyExactOp,
617 GEPOp,
618 FPMathOp,
619 NonNegOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669
670 OperationType OpType;
671
672 union {
677 ExactFlagsTy ExactFlags;
680 FastMathFlagsTy FMFs;
681 FCmpFlagsTy FCmpFlags;
682 unsigned AllFlags;
683 };
684
685public:
686 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
687
689 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
690 OpType = OperationType::FCmp;
691 FCmpFlags.Pred = FCmp->getPredicate();
692 FCmpFlags.FMFs = FCmp->getFastMathFlags();
693 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
694 OpType = OperationType::Cmp;
695 CmpPredicate = Op->getPredicate();
696 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
697 OpType = OperationType::DisjointOp;
698 DisjointFlags.IsDisjoint = Op->isDisjoint();
699 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
700 OpType = OperationType::OverflowingBinOp;
701 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
702 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
703 OpType = OperationType::Trunc;
704 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
705 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
706 OpType = OperationType::PossiblyExactOp;
707 ExactFlags.IsExact = Op->isExact();
708 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
709 OpType = OperationType::GEPOp;
710 GEPFlags = GEP->getNoWrapFlags();
711 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
712 OpType = OperationType::NonNegOp;
713 NonNegFlags.NonNeg = PNNI->hasNonNeg();
714 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
715 OpType = OperationType::FPMathOp;
716 FMFs = Op->getFastMathFlags();
717 } else {
718 OpType = OperationType::Other;
719 AllFlags = 0;
720 }
721 }
722
724 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
725
727 : OpType(OperationType::FCmp) {
728 FCmpFlags.Pred = Pred;
729 FCmpFlags.FMFs = FMFs;
730 }
731
733 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
734
736 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
737
738 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
739
741 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
742
744 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
745
747 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
748
750 OpType = Other.OpType;
751 AllFlags = Other.AllFlags;
752 }
753
754 /// Only keep flags also present in \p Other. \p Other must have the same
755 /// OpType as the current object.
756 void intersectFlags(const VPIRFlags &Other);
757
758 /// Drop all poison-generating flags.
760 // NOTE: This needs to be kept in-sync with
761 // Instruction::dropPoisonGeneratingFlags.
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 WrapFlags.HasNUW = false;
765 WrapFlags.HasNSW = false;
766 break;
767 case OperationType::Trunc:
768 TruncFlags.HasNUW = false;
769 TruncFlags.HasNSW = false;
770 break;
771 case OperationType::DisjointOp:
772 DisjointFlags.IsDisjoint = false;
773 break;
774 case OperationType::PossiblyExactOp:
775 ExactFlags.IsExact = false;
776 break;
777 case OperationType::GEPOp:
779 break;
780 case OperationType::FPMathOp:
781 case OperationType::FCmp:
782 getFMFsRef().NoNaNs = false;
783 getFMFsRef().NoInfs = false;
784 break;
785 case OperationType::NonNegOp:
786 NonNegFlags.NonNeg = false;
787 break;
788 case OperationType::Cmp:
789 case OperationType::Other:
790 break;
791 }
792 }
793
794 /// Apply the IR flags to \p I.
795 void applyFlags(Instruction &I) const {
796 switch (OpType) {
797 case OperationType::OverflowingBinOp:
798 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
799 I.setHasNoSignedWrap(WrapFlags.HasNSW);
800 break;
801 case OperationType::Trunc:
802 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
803 I.setHasNoSignedWrap(TruncFlags.HasNSW);
804 break;
805 case OperationType::DisjointOp:
806 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
807 break;
808 case OperationType::PossiblyExactOp:
809 I.setIsExact(ExactFlags.IsExact);
810 break;
811 case OperationType::GEPOp:
812 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
813 break;
814 case OperationType::FPMathOp:
815 case OperationType::FCmp: {
816 const FastMathFlagsTy &F = getFMFsRef();
817 I.setHasAllowReassoc(F.AllowReassoc);
818 I.setHasNoNaNs(F.NoNaNs);
819 I.setHasNoInfs(F.NoInfs);
820 I.setHasNoSignedZeros(F.NoSignedZeros);
821 I.setHasAllowReciprocal(F.AllowReciprocal);
822 I.setHasAllowContract(F.AllowContract);
823 I.setHasApproxFunc(F.ApproxFunc);
824 break;
825 }
826 case OperationType::NonNegOp:
827 I.setNonNeg(NonNegFlags.NonNeg);
828 break;
829 case OperationType::Cmp:
830 case OperationType::Other:
831 break;
832 }
833 }
834
836 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
837 "recipe doesn't have a compare predicate");
838 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
839 }
840
842 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
843 "recipe doesn't have a compare predicate");
844 if (OpType == OperationType::FCmp)
845 FCmpFlags.Pred = Pred;
846 else
847 CmpPredicate = Pred;
848 }
849
851
852 /// Returns true if the recipe has a comparison predicate.
853 bool hasPredicate() const {
854 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
855 }
856
857 /// Returns true if the recipe has fast-math flags.
858 bool hasFastMathFlags() const {
859 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
860 }
861
863
864 /// Returns true if the recipe has non-negative flag.
865 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
866
867 bool isNonNeg() const {
868 assert(OpType == OperationType::NonNegOp &&
869 "recipe doesn't have a NNEG flag");
870 return NonNegFlags.NonNeg;
871 }
872
873 bool hasNoUnsignedWrap() const {
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 return WrapFlags.HasNUW;
877 case OperationType::Trunc:
878 return TruncFlags.HasNUW;
879 default:
880 llvm_unreachable("recipe doesn't have a NUW flag");
881 }
882 }
883
884 bool hasNoSignedWrap() const {
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 return WrapFlags.HasNSW;
888 case OperationType::Trunc:
889 return TruncFlags.HasNSW;
890 default:
891 llvm_unreachable("recipe doesn't have a NSW flag");
892 }
893 }
894
895 bool isDisjoint() const {
896 assert(OpType == OperationType::DisjointOp &&
897 "recipe cannot have a disjoing flag");
898 return DisjointFlags.IsDisjoint;
899 }
900
901private:
902 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
903 FastMathFlagsTy &getFMFsRef() {
904 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
905 }
906 const FastMathFlagsTy &getFMFsRef() const {
907 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
908 }
909
910public:
911#if !defined(NDEBUG)
912 /// Returns true if the set flags are valid for \p Opcode.
913 bool flagsValidForOpcode(unsigned Opcode) const;
914#endif
915
916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
917 void printFlags(raw_ostream &O) const;
918#endif
919};
920
921/// A pure-virtual common base class for recipes defining a single VPValue and
922/// using IR flags.
924 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
925 const VPIRFlags &Flags,
927 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
928
929 static inline bool classof(const VPRecipeBase *R) {
930 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
935 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
936 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
937 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
938 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
939 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
940 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
941 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942 }
943
944 static inline bool classof(const VPUser *U) {
945 auto *R = dyn_cast<VPRecipeBase>(U);
946 return R && classof(R);
947 }
948
949 static inline bool classof(const VPValue *V) {
950 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
951 return R && classof(R);
952 }
953
954 VPRecipeWithIRFlags *clone() override = 0;
955
956 static inline bool classof(const VPSingleDefRecipe *U) {
957 auto *R = dyn_cast<VPRecipeBase>(U);
958 return R && classof(R);
959 }
960
961 void execute(VPTransformState &State) override = 0;
962
963 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
965 VPCostContext &Ctx) const;
966};
967
968/// Helper to access the operand that contains the unroll part for this recipe
969/// after unrolling.
970template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
971protected:
972 /// Return the VPValue operand containing the unroll part or null if there is
973 /// no such operand.
974 VPValue *getUnrollPartOperand(const VPUser &U) const;
975
976 /// Return the unroll part.
977 unsigned getUnrollPart(const VPUser &U) const;
978};
979
980/// Helper to manage IR metadata for recipes. It filters out metadata that
981/// cannot be propagated.
984
985public:
986 VPIRMetadata() = default;
987
988 /// Adds metatadata that can be preserved from the original instruction
989 /// \p I.
991
992 /// Copy constructor for cloning.
993 VPIRMetadata(const VPIRMetadata &Other) = default;
994
996
997 /// Add all metadata to \p I.
998 void applyMetadata(Instruction &I) const;
999
1000 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1001 /// already exists, it will be replaced. Otherwise, it will be added.
1002 void setMetadata(unsigned Kind, MDNode *Node) {
1003 auto It =
1004 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1005 return P.first == Kind;
1006 });
1007 if (It != Metadata.end())
1008 It->second = Node;
1009 else
1010 Metadata.emplace_back(Kind, Node);
1011 }
1012
1013 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
1014 /// nodes that are common to both.
1015 void intersect(const VPIRMetadata &MD);
1016
1017 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1018 MDNode *getMetadata(unsigned Kind) const {
1019 auto It =
1020 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1021 return It != Metadata.end() ? It->second : nullptr;
1022 }
1023};
1024
1025/// This is a concrete Recipe that models a single VPlan-level instruction.
1026/// While as any Recipe it may generate a sequence of IR instructions when
1027/// executed, these instructions would always form a single-def expression as
1028/// the VPInstruction is also a single def-use vertex.
1030 public VPIRMetadata,
1031 public VPUnrollPartAccessor<1> {
1032 friend class VPlanSlp;
1033
1034public:
1035 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1036 enum {
1038 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1039 // values of a first-order recurrence.
1043 // Creates a mask where each lane is active (true) whilst the current
1044 // counter (first operand + index) is less than the second operand. i.e.
1045 // mask[i] = icmpt ult (op0 + i), op1
1046 // The size of the mask returned is VF * Multiplier (UF, third op).
1050 // Increment the canonical IV separately for each unrolled part.
1055 /// Given operands of (the same) struct type, creates a struct of fixed-
1056 /// width vectors each containing a struct field of all operands. The
1057 /// number of operands matches the element count of every vector.
1059 /// Creates a fixed-width vector containing all operands. The number of
1060 /// operands matches the vector element count.
1062 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1063 /// abstract VPInstruction whose single defined VPValue represents VF
1064 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1065 /// VPInstructions.
1067 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1068 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1072 // Extracts the last lane from its operand if it is a vector, or the last
1073 // part if scalar. In the latter case, the recipe will be removed during
1074 // unrolling.
1076 // Extracts the last lane for each part from its operand.
1078 // Extracts the second-to-last lane from its operand or the second-to-last
1079 // part if it is scalar. In the latter case, the recipe will be removed
1080 // during unrolling.
1082 LogicalAnd, // Non-poison propagating logical And.
1083 // Add an offset in bytes (second operand) to a base pointer (first
1084 // operand). Only generates scalar values (either for the first lane only or
1085 // for all lanes, depending on its uses).
1087 // Add a vector offset in bytes (second operand) to a scalar base pointer
1088 // (first operand).
1090 // Returns a scalar boolean value, which is true if any lane of its
1091 // (boolean) vector operands is true. It produces the reduced value across
1092 // all unrolled iterations. Unrolling will add all copies of its original
1093 // operand as additional operands. AnyOf is poison-safe as all operands
1094 // will be frozen.
1096 // Calculates the first active lane index of the vector predicate operands.
1097 // It produces the lane index across all unrolled iterations. Unrolling will
1098 // add all copies of its original operand as additional operands.
1100
1101 // The opcodes below are used for VPInstructionWithType.
1102 //
1103 /// Scale the first operand (vector step) by the second operand
1104 /// (scalar-step). Casts both operands to the result type if needed.
1106 /// Start vector for reductions with 3 operands: the original start value,
1107 /// the identity value for the reduction and an integer indicating the
1108 /// scaling factor.
1110 // Creates a step vector starting from 0 to VF with a step of 1.
1112 /// Extracts a single lane (first operand) from a set of vector operands.
1113 /// The lane specifies an index into a vector formed by combining all vector
1114 /// operands (all operands after the first one).
1116 /// Explicit user for the resume phi of the canonical induction in the main
1117 /// VPlan, used by the epilogue vector loop.
1119 /// Returns the value for vscale.
1122 };
1123
1124 /// Returns true if this VPInstruction generates scalar values for all lanes.
1125 /// Most VPInstructions generate a single value per part, either vector or
1126 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1127 /// values per all lanes, stemming from an original ingredient. This method
1128 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1129 /// underlying ingredient.
1130 bool doesGeneratePerAllLanes() const;
1131
1132private:
1133 typedef unsigned char OpcodeTy;
1134 OpcodeTy Opcode;
1135
1136 /// An optional name that can be used for the generated IR instruction.
1137 const std::string Name;
1138
1139 /// Returns true if we can generate a scalar for the first lane only if
1140 /// needed.
1141 bool canGenerateScalarForFirstLane() const;
1142
1143 /// Utility methods serving execute(): generates a single vector instance of
1144 /// the modeled instruction. \returns the generated value. . In some cases an
1145 /// existing value is returned rather than a generated one.
1146 Value *generate(VPTransformState &State);
1147
1148#if !defined(NDEBUG)
1149 /// Return the number of operands determined by the opcode of the
1150 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1151 /// directly by the opcode.
1152 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1153#endif
1154
1155public:
1156 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1157 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1158 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1159
1160 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1161
1162 VPInstruction *clone() override {
1163 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1164 getDebugLoc(), Name);
1165 if (getUnderlyingValue())
1166 New->setUnderlyingValue(getUnderlyingInstr());
1167 return New;
1168 }
1169
1170 unsigned getOpcode() const { return Opcode; }
1171
1172 /// Generate the instruction.
1173 /// TODO: We currently execute only per-part unless a specific instance is
1174 /// provided.
1175 void execute(VPTransformState &State) override;
1176
1177 /// Return the cost of this VPInstruction.
1178 InstructionCost computeCost(ElementCount VF,
1179 VPCostContext &Ctx) const override;
1180
1181#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1182 /// Print the VPInstruction to dbgs() (for debugging).
1183 LLVM_DUMP_METHOD void dump() const;
1184#endif
1185
1186 bool hasResult() const {
1187 // CallInst may or may not have a result, depending on the called function.
1188 // Conservatively return calls have results for now.
1189 switch (getOpcode()) {
1190 case Instruction::Ret:
1191 case Instruction::Br:
1192 case Instruction::Store:
1193 case Instruction::Switch:
1194 case Instruction::IndirectBr:
1195 case Instruction::Resume:
1196 case Instruction::CatchRet:
1197 case Instruction::Unreachable:
1198 case Instruction::Fence:
1199 case Instruction::AtomicRMW:
1202 return false;
1203 default:
1204 return true;
1205 }
1206 }
1207
1208 /// Returns true if the underlying opcode may read from or write to memory.
1209 bool opcodeMayReadOrWriteFromMemory() const;
1210
1211 /// Returns true if the recipe only uses the first lane of operand \p Op.
1212 bool usesFirstLaneOnly(const VPValue *Op) const override;
1213
1214 /// Returns true if the recipe only uses the first part of operand \p Op.
1215 bool usesFirstPartOnly(const VPValue *Op) const override;
1216
1217 /// Returns true if this VPInstruction produces a scalar value from a vector,
1218 /// e.g. by performing a reduction or extracting a lane.
1219 bool isVectorToScalar() const;
1220
1221 /// Returns true if this VPInstruction's operands are single scalars and the
1222 /// result is also a single scalar.
1223 bool isSingleScalar() const;
1224
1225 /// Returns the symbolic name assigned to the VPInstruction.
1226 StringRef getName() const { return Name; }
1227
1228protected:
1229#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1230 /// Print the VPInstruction to \p O.
1231 void printRecipe(raw_ostream &O, const Twine &Indent,
1232 VPSlotTracker &SlotTracker) const override;
1233#endif
1234};
1235
1236/// A specialization of VPInstruction augmenting it with a dedicated result
1237/// type, to be used when the opcode and operands of the VPInstruction don't
1238/// directly determine the result type. Note that there is no separate VPDef ID
1239/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1240/// distinguished purely by the opcode.
1242 /// Scalar result type produced by the recipe.
1243 Type *ResultTy;
1244
1245public:
1247 Type *ResultTy, const VPIRFlags &Flags = {},
1248 const VPIRMetadata &Metadata = {},
1250 const Twine &Name = "")
1251 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1252 ResultTy(ResultTy) {}
1253
1254 static inline bool classof(const VPRecipeBase *R) {
1255 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1256 // type information.
1257 if (R->isScalarCast())
1258 return true;
1259 auto *VPI = dyn_cast<VPInstruction>(R);
1260 if (!VPI)
1261 return false;
1262 switch (VPI->getOpcode()) {
1266 return true;
1267 default:
1268 return false;
1269 }
1270 }
1271
1272 static inline bool classof(const VPUser *R) {
1274 }
1275
1276 VPInstruction *clone() override {
1277 auto *New =
1279 *this, *this, getDebugLoc(), getName());
1280 New->setUnderlyingValue(getUnderlyingValue());
1281 return New;
1282 }
1283
1284 void execute(VPTransformState &State) override;
1285
1286 /// Return the cost of this VPInstruction.
1288 VPCostContext &Ctx) const override {
1289 // TODO: Compute accurate cost after retiring the legacy cost model.
1290 return 0;
1291 }
1292
1293 Type *getResultType() const { return ResultTy; }
1294
1295protected:
1296#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1297 /// Print the recipe.
1298 void printRecipe(raw_ostream &O, const Twine &Indent,
1299 VPSlotTracker &SlotTracker) const override;
1300#endif
1301};
1302
1303/// Helper type to provide functions to access incoming values and blocks for
1304/// phi-like recipes.
1306protected:
1307 /// Return a VPRecipeBase* to the current object.
1308 virtual const VPRecipeBase *getAsRecipe() const = 0;
1309
1310public:
1311 virtual ~VPPhiAccessors() = default;
1312
1313 /// Returns the incoming VPValue with index \p Idx.
1314 VPValue *getIncomingValue(unsigned Idx) const {
1315 return getAsRecipe()->getOperand(Idx);
1316 }
1317
1318 /// Returns the incoming block with index \p Idx.
1319 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1320
1321 /// Returns the number of incoming values, also number of incoming blocks.
1322 virtual unsigned getNumIncoming() const {
1323 return getAsRecipe()->getNumOperands();
1324 }
1325
1326 /// Returns an interator range over the incoming values.
1328 return make_range(getAsRecipe()->op_begin(),
1329 getAsRecipe()->op_begin() + getNumIncoming());
1330 }
1331
1333 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1334
1335 /// Returns an iterator range over the incoming blocks.
1337 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1338 return getIncomingBlock(Idx);
1339 };
1340 return map_range(index_range(0, getNumIncoming()), GetBlock);
1341 }
1342
1343 /// Returns an iterator range over pairs of incoming values and corresponding
1344 /// incoming blocks.
1350
1351 /// Removes the incoming value for \p IncomingBlock, which must be a
1352 /// predecessor.
1353 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1354
1355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1356 /// Print the recipe.
1358#endif
1359};
1360
1362 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1363 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1364
1365 static inline bool classof(const VPUser *U) {
1366 auto *VPI = dyn_cast<VPInstruction>(U);
1367 return VPI && VPI->getOpcode() == Instruction::PHI;
1368 }
1369
1370 static inline bool classof(const VPValue *V) {
1371 auto *VPI = dyn_cast<VPInstruction>(V);
1372 return VPI && VPI->getOpcode() == Instruction::PHI;
1373 }
1374
1375 static inline bool classof(const VPSingleDefRecipe *SDR) {
1376 auto *VPI = dyn_cast<VPInstruction>(SDR);
1377 return VPI && VPI->getOpcode() == Instruction::PHI;
1378 }
1379
1380 VPPhi *clone() override {
1381 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1382 PhiR->setUnderlyingValue(getUnderlyingValue());
1383 return PhiR;
1384 }
1385
1386 void execute(VPTransformState &State) override;
1387
1388protected:
1389#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1390 /// Print the recipe.
1391 void printRecipe(raw_ostream &O, const Twine &Indent,
1392 VPSlotTracker &SlotTracker) const override;
1393#endif
1394
1395 const VPRecipeBase *getAsRecipe() const override { return this; }
1396};
1397
1398/// A recipe to wrap on original IR instruction not to be modified during
1399/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1400/// Expect PHIs, VPIRInstructions cannot have any operands.
1402 Instruction &I;
1403
1404protected:
1405 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1406 /// subclasses may need to be created, e.g. VPIRPhi.
1408 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1409
1410public:
1411 ~VPIRInstruction() override = default;
1412
1413 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1414 /// VPIRInstruction.
1416
1417 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1418
1420 auto *R = create(I);
1421 for (auto *Op : operands())
1422 R->addOperand(Op);
1423 return R;
1424 }
1425
1426 void execute(VPTransformState &State) override;
1427
1428 /// Return the cost of this VPIRInstruction.
1430 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1431
1432 Instruction &getInstruction() const { return I; }
1433
1434 bool usesScalars(const VPValue *Op) const override {
1436 "Op must be an operand of the recipe");
1437 return true;
1438 }
1439
1440 bool usesFirstPartOnly(const VPValue *Op) const override {
1442 "Op must be an operand of the recipe");
1443 return true;
1444 }
1445
1446 bool usesFirstLaneOnly(const VPValue *Op) const override {
1448 "Op must be an operand of the recipe");
1449 return true;
1450 }
1451
1452 /// Update the recipes first operand to the last lane of the operand using \p
1453 /// Builder. Must only be used for VPIRInstructions with at least one operand
1454 /// wrapping a PHINode.
1456
1457protected:
1458#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1459 /// Print the recipe.
1460 void printRecipe(raw_ostream &O, const Twine &Indent,
1461 VPSlotTracker &SlotTracker) const override;
1462#endif
1463};
1464
1465/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1466/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1467/// allowed, and it is used to add a new incoming value for the single
1468/// predecessor VPBB.
1470 public VPPhiAccessors {
1472
1473 static inline bool classof(const VPRecipeBase *U) {
1474 auto *R = dyn_cast<VPIRInstruction>(U);
1475 return R && isa<PHINode>(R->getInstruction());
1476 }
1477
1479
1480 void execute(VPTransformState &State) override;
1481
1482protected:
1483#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1484 /// Print the recipe.
1485 void printRecipe(raw_ostream &O, const Twine &Indent,
1486 VPSlotTracker &SlotTracker) const override;
1487#endif
1488
1489 const VPRecipeBase *getAsRecipe() const override { return this; }
1490};
1491
1492/// VPWidenRecipe is a recipe for producing a widened instruction using the
1493/// opcode and operands of the recipe. This recipe covers most of the
1494/// traditional vectorization cases where each recipe transforms into a
1495/// vectorized version of itself.
1497 public VPIRMetadata {
1498 unsigned Opcode;
1499
1500public:
1501 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1502 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1503 DebugLoc DL)
1504 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1505 VPIRMetadata(Metadata), Opcode(Opcode) {}
1506
1508 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1509 DebugLoc DL = {})
1510 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1511 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1512 setUnderlyingValue(&I);
1513 }
1514
1515 ~VPWidenRecipe() override = default;
1516
1517 VPWidenRecipe *clone() override {
1518 auto *R =
1519 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1520 R->setUnderlyingValue(getUnderlyingValue());
1521 return R;
1522 }
1523
1524 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1525
1526 /// Produce a widened instruction using the opcode and operands of the recipe,
1527 /// processing State.VF elements.
1528 void execute(VPTransformState &State) override;
1529
1530 /// Return the cost of this VPWidenRecipe.
1531 InstructionCost computeCost(ElementCount VF,
1532 VPCostContext &Ctx) const override;
1533
1534 unsigned getOpcode() const { return Opcode; }
1535
1536protected:
1537#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1538 /// Print the recipe.
1539 void printRecipe(raw_ostream &O, const Twine &Indent,
1540 VPSlotTracker &SlotTracker) const override;
1541#endif
1542};
1543
1544/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1546 /// Cast instruction opcode.
1547 Instruction::CastOps Opcode;
1548
1549 /// Result type for the cast.
1550 Type *ResultTy;
1551
1552public:
1554 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1555 const VPIRMetadata &Metadata = {},
1557 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1558 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1559 assert(flagsValidForOpcode(Opcode) &&
1560 "Set flags not supported for the provided opcode");
1562 }
1563
1564 ~VPWidenCastRecipe() override = default;
1565
1567 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1569 *this, *this, getDebugLoc());
1570 }
1571
1572 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1573
1574 /// Produce widened copies of the cast.
1575 void execute(VPTransformState &State) override;
1576
1577 /// Return the cost of this VPWidenCastRecipe.
1579 VPCostContext &Ctx) const override;
1580
1581 Instruction::CastOps getOpcode() const { return Opcode; }
1582
1583 /// Returns the result type of the cast.
1584 Type *getResultType() const { return ResultTy; }
1585
1586protected:
1587#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1588 /// Print the recipe.
1589 void printRecipe(raw_ostream &O, const Twine &Indent,
1590 VPSlotTracker &SlotTracker) const override;
1591#endif
1592};
1593
1594/// A recipe for widening vector intrinsics.
1596 /// ID of the vector intrinsic to widen.
1597 Intrinsic::ID VectorIntrinsicID;
1598
1599 /// Scalar return type of the intrinsic.
1600 Type *ResultTy;
1601
1602 /// True if the intrinsic may read from memory.
1603 bool MayReadFromMemory;
1604
1605 /// True if the intrinsic may read write to memory.
1606 bool MayWriteToMemory;
1607
1608 /// True if the intrinsic may have side-effects.
1609 bool MayHaveSideEffects;
1610
1611public:
1613 ArrayRef<VPValue *> CallArguments, Type *Ty,
1614 const VPIRFlags &Flags = {},
1615 const VPIRMetadata &MD = {},
1617 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1618 DL),
1619 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1620 MayReadFromMemory(CI.mayReadFromMemory()),
1621 MayWriteToMemory(CI.mayWriteToMemory()),
1622 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1623 setUnderlyingValue(&CI);
1624 }
1625
1627 ArrayRef<VPValue *> CallArguments, Type *Ty,
1628 const VPIRFlags &Flags = {},
1629 const VPIRMetadata &Metadata = {},
1631 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1632 DL),
1633 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1634 ResultTy(Ty) {
1635 LLVMContext &Ctx = Ty->getContext();
1636 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1637 MemoryEffects ME = Attrs.getMemoryEffects();
1638 MayReadFromMemory = !ME.onlyWritesMemory();
1639 MayWriteToMemory = !ME.onlyReadsMemory();
1640 MayHaveSideEffects = MayWriteToMemory ||
1641 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1642 !Attrs.hasAttribute(Attribute::WillReturn);
1643 }
1644
1645 ~VPWidenIntrinsicRecipe() override = default;
1646
1648 if (Value *CI = getUnderlyingValue())
1649 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1650 operands(), ResultTy, *this, *this,
1651 getDebugLoc());
1652 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1653 *this, *this, getDebugLoc());
1654 }
1655
1656 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1657
1658 /// Produce a widened version of the vector intrinsic.
1659 void execute(VPTransformState &State) override;
1660
1661 /// Return the cost of this vector intrinsic.
1663 VPCostContext &Ctx) const override;
1664
1665 /// Return the ID of the intrinsic.
1666 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1667
1668 /// Return the scalar return type of the intrinsic.
1669 Type *getResultType() const { return ResultTy; }
1670
1671 /// Return to name of the intrinsic as string.
1673
1674 /// Returns true if the intrinsic may read from memory.
1675 bool mayReadFromMemory() const { return MayReadFromMemory; }
1676
1677 /// Returns true if the intrinsic may write to memory.
1678 bool mayWriteToMemory() const { return MayWriteToMemory; }
1679
1680 /// Returns true if the intrinsic may have side-effects.
1681 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1682
1683 bool usesFirstLaneOnly(const VPValue *Op) const override;
1684
1685protected:
1686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1687 /// Print the recipe.
1688 void printRecipe(raw_ostream &O, const Twine &Indent,
1689 VPSlotTracker &SlotTracker) const override;
1690#endif
1691};
1692
1693/// A recipe for widening Call instructions using library calls.
1695 public VPIRMetadata {
1696 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1697 /// between a given VF and the chosen vectorized variant, so there will be a
1698 /// different VPlan for each VF with a valid variant.
1699 Function *Variant;
1700
1701public:
1703 ArrayRef<VPValue *> CallArguments,
1704 const VPIRFlags &Flags = {},
1705 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1706 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1707 VPIRMetadata(Metadata), Variant(Variant) {
1708 setUnderlyingValue(UV);
1709 assert(
1710 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1711 "last operand must be the called function");
1712 }
1713
1714 ~VPWidenCallRecipe() override = default;
1715
1717 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1718 *this, *this, getDebugLoc());
1719 }
1720
1721 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1722
1723 /// Produce a widened version of the call instruction.
1724 void execute(VPTransformState &State) override;
1725
1726 /// Return the cost of this VPWidenCallRecipe.
1727 InstructionCost computeCost(ElementCount VF,
1728 VPCostContext &Ctx) const override;
1729
1733
1736
1737protected:
1738#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1739 /// Print the recipe.
1740 void printRecipe(raw_ostream &O, const Twine &Indent,
1741 VPSlotTracker &SlotTracker) const override;
1742#endif
1743};
1744
1745/// A recipe representing a sequence of load -> update -> store as part of
1746/// a histogram operation. This means there may be aliasing between vector
1747/// lanes, which is handled by the llvm.experimental.vector.histogram family
1748/// of intrinsics. The only update operations currently supported are
1749/// 'add' and 'sub' where the other term is loop-invariant.
1751 /// Opcode of the update operation, currently either add or sub.
1752 unsigned Opcode;
1753
1754public:
1755 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1757 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1758
1759 ~VPHistogramRecipe() override = default;
1760
1762 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1763 }
1764
1765 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1766
1767 /// Produce a vectorized histogram operation.
1768 void execute(VPTransformState &State) override;
1769
1770 /// Return the cost of this VPHistogramRecipe.
1772 VPCostContext &Ctx) const override;
1773
1774 unsigned getOpcode() const { return Opcode; }
1775
1776 /// Return the mask operand if one was provided, or a null pointer if all
1777 /// lanes should be executed unconditionally.
1778 VPValue *getMask() const {
1779 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1780 }
1781
1782protected:
1783#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1784 /// Print the recipe
1785 void printRecipe(raw_ostream &O, const Twine &Indent,
1786 VPSlotTracker &SlotTracker) const override;
1787#endif
1788};
1789
1790/// A recipe for widening select instructions. Supports both wide vector and
1791/// single-scalar conditions, matching the behavior of LLVM IR's select
1792/// instruction.
1794 public VPIRMetadata {
1796 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1797 DebugLoc DL = {})
1798 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
1799 VPIRMetadata(MD) {
1800 setUnderlyingValue(SI);
1801 }
1802
1803 ~VPWidenSelectRecipe() override = default;
1804
1807 operands(), *this, *this, getDebugLoc());
1808 }
1809
1810 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1811
1812 /// Produce a widened version of the select instruction.
1813 void execute(VPTransformState &State) override;
1814
1815 /// Return the cost of this VPWidenSelectRecipe.
1816 InstructionCost computeCost(ElementCount VF,
1817 VPCostContext &Ctx) const override;
1818
1819 unsigned getOpcode() const { return Instruction::Select; }
1820
1821 VPValue *getCond() const {
1822 return getOperand(0);
1823 }
1824
1825 /// Returns true if the recipe only uses the first lane of operand \p Op.
1826 bool usesFirstLaneOnly(const VPValue *Op) const override {
1828 "Op must be an operand of the recipe");
1829 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1830 }
1831
1832protected:
1833#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1834 /// Print the recipe.
1835 void printRecipe(raw_ostream &O, const Twine &Indent,
1836 VPSlotTracker &SlotTracker) const override;
1837#endif
1838};
1839
1840/// A recipe for handling GEP instructions.
1842 Type *SourceElementTy;
1843
1844 bool isPointerLoopInvariant() const {
1845 return getOperand(0)->isDefinedOutsideLoopRegions();
1846 }
1847
1848 bool isIndexLoopInvariant(unsigned I) const {
1849 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1850 }
1851
1852 bool areAllOperandsInvariant() const {
1853 return all_of(operands(), [](VPValue *Op) {
1854 return Op->isDefinedOutsideLoopRegions();
1855 });
1856 }
1857
1858public:
1860 const VPIRFlags &Flags = {},
1862 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1863 SourceElementTy(GEP->getSourceElementType()) {
1864 setUnderlyingValue(GEP);
1866 (void)Metadata;
1868 assert(Metadata.empty() && "unexpected metadata on GEP");
1869 }
1870
1871 ~VPWidenGEPRecipe() override = default;
1872
1875 operands(), *this, getDebugLoc());
1876 }
1877
1878 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1879
1880 /// This recipe generates a GEP instruction.
1881 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1882
1883 /// Generate the gep nodes.
1884 void execute(VPTransformState &State) override;
1885
1886 Type *getSourceElementType() const { return SourceElementTy; }
1887
1888 /// Return the cost of this VPWidenGEPRecipe.
1890 VPCostContext &Ctx) const override {
1891 // TODO: Compute accurate cost after retiring the legacy cost model.
1892 return 0;
1893 }
1894
1895 /// Returns true if the recipe only uses the first lane of operand \p Op.
1896 bool usesFirstLaneOnly(const VPValue *Op) const override {
1898 "Op must be an operand of the recipe");
1899 if (Op == getOperand(0))
1900 return isPointerLoopInvariant();
1901 else
1902 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1903 }
1904
1905protected:
1906#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1907 /// Print the recipe.
1908 void printRecipe(raw_ostream &O, const Twine &Indent,
1909 VPSlotTracker &SlotTracker) const override;
1910#endif
1911};
1912
1913/// A recipe to compute a pointer to the last element of each part of a widened
1914/// memory access for widened memory accesses of IndexedTy. Used for
1915/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1917 public VPUnrollPartAccessor<2> {
1918 Type *IndexedTy;
1919
1920 /// The constant stride of the pointer computed by this recipe, expressed in
1921 /// units of IndexedTy.
1922 int64_t Stride;
1923
1924public:
1926 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1927 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1928 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1929 IndexedTy(IndexedTy), Stride(Stride) {
1930 assert(Stride < 0 && "Stride must be negative");
1931 }
1932
1933 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1934
1936 const VPValue *getVFValue() const { return getOperand(1); }
1937
1938 void execute(VPTransformState &State) override;
1939
1940 bool usesFirstLaneOnly(const VPValue *Op) const override {
1942 "Op must be an operand of the recipe");
1943 return true;
1944 }
1945
1946 /// Return the cost of this VPVectorPointerRecipe.
1948 VPCostContext &Ctx) const override {
1949 // TODO: Compute accurate cost after retiring the legacy cost model.
1950 return 0;
1951 }
1952
1953 /// Returns true if the recipe only uses the first part of operand \p Op.
1954 bool usesFirstPartOnly(const VPValue *Op) const override {
1956 "Op must be an operand of the recipe");
1957 assert(getNumOperands() <= 2 && "must have at most two operands");
1958 return true;
1959 }
1960
1962 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1963 Stride, getGEPNoWrapFlags(),
1964 getDebugLoc());
1965 }
1966
1967protected:
1968#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1969 /// Print the recipe.
1970 void printRecipe(raw_ostream &O, const Twine &Indent,
1971 VPSlotTracker &SlotTracker) const override;
1972#endif
1973};
1974
1975/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1977 public VPUnrollPartAccessor<1> {
1978 Type *SourceElementTy;
1979
1980public:
1983 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1984 GEPFlags, DL),
1985 SourceElementTy(SourceElementTy) {}
1986
1987 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1988
1989 void execute(VPTransformState &State) override;
1990
1991 Type *getSourceElementType() const { return SourceElementTy; }
1992
1993 bool usesFirstLaneOnly(const VPValue *Op) const override {
1995 "Op must be an operand of the recipe");
1996 return true;
1997 }
1998
1999 /// Returns true if the recipe only uses the first part of operand \p Op.
2000 bool usesFirstPartOnly(const VPValue *Op) const override {
2002 "Op must be an operand of the recipe");
2003 assert(getNumOperands() <= 2 && "must have at most two operands");
2004 return true;
2005 }
2006
2008 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2010 }
2011
2012 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
2013 /// this is only accurate after the VPlan has been unrolled.
2014 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
2015
2016 /// Return the cost of this VPHeaderPHIRecipe.
2018 VPCostContext &Ctx) const override {
2019 // TODO: Compute accurate cost after retiring the legacy cost model.
2020 return 0;
2021 }
2022
2023protected:
2024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2025 /// Print the recipe.
2026 void printRecipe(raw_ostream &O, const Twine &Indent,
2027 VPSlotTracker &SlotTracker) const override;
2028#endif
2029};
2030
2031/// A pure virtual base class for all recipes modeling header phis, including
2032/// phis for first order recurrences, pointer inductions and reductions. The
2033/// start value is the first operand of the recipe and the incoming value from
2034/// the backedge is the second operand.
2035///
2036/// Inductions are modeled using the following sub-classes:
2037/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2038/// starting at a specified value (zero for the main vector loop, the resume
2039/// value for the epilogue vector loop) and stepping by 1. The induction
2040/// controls exiting of the vector loop by comparing against the vector trip
2041/// count. Produces a single scalar PHI for the induction value per
2042/// iteration.
2043/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2044/// floating point inductions with arbitrary start and step values. Produces
2045/// a vector PHI per-part.
2046/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2047/// value of an IV with different start and step values. Produces a single
2048/// scalar value per iteration
2049/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2050/// canonical or derived induction.
2051/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2052/// pointer induction. Produces either a vector PHI per-part or scalar values
2053/// per-lane based on the canonical induction.
2055 public VPPhiAccessors {
2056protected:
2057 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2058 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2059 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2060 UnderlyingInstr, DL) {}
2061
2062 const VPRecipeBase *getAsRecipe() const override { return this; }
2063
2064public:
2065 ~VPHeaderPHIRecipe() override = default;
2066
2067 /// Method to support type inquiry through isa, cast, and dyn_cast.
2068 static inline bool classof(const VPRecipeBase *B) {
2069 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2070 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2071 }
2072 static inline bool classof(const VPValue *V) {
2073 auto *B = V->getDefiningRecipe();
2074 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2075 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2076 }
2077
2078 /// Generate the phi nodes.
2079 void execute(VPTransformState &State) override = 0;
2080
2081 /// Return the cost of this header phi recipe.
2083 VPCostContext &Ctx) const override;
2084
2085 /// Returns the start value of the phi, if one is set.
2087 return getNumOperands() == 0 ? nullptr : getOperand(0);
2088 }
2090 return getNumOperands() == 0 ? nullptr : getOperand(0);
2091 }
2092
2093 /// Update the start value of the recipe.
2095
2096 /// Returns the incoming value from the loop backedge.
2098 return getOperand(1);
2099 }
2100
2101 /// Update the incoming value from the loop backedge.
2103
2104 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2105 /// to be a recipe.
2107 return *getBackedgeValue()->getDefiningRecipe();
2108 }
2109
2110protected:
2111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2112 /// Print the recipe.
2113 void printRecipe(raw_ostream &O, const Twine &Indent,
2114 VPSlotTracker &SlotTracker) const override = 0;
2115#endif
2116};
2117
2118/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2119/// VPWidenPointerInductionRecipe), providing shared functionality, including
2120/// retrieving the step value, induction descriptor and original phi node.
2122 const InductionDescriptor &IndDesc;
2123
2124public:
2125 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2126 VPValue *Step, const InductionDescriptor &IndDesc,
2127 DebugLoc DL)
2128 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2129 addOperand(Step);
2130 }
2131
2132 static inline bool classof(const VPRecipeBase *R) {
2133 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2134 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2135 }
2136
2137 static inline bool classof(const VPValue *V) {
2138 auto *R = V->getDefiningRecipe();
2139 return R && classof(R);
2140 }
2141
2142 static inline bool classof(const VPHeaderPHIRecipe *R) {
2143 return classof(static_cast<const VPRecipeBase *>(R));
2144 }
2145
2146 void execute(VPTransformState &State) override = 0;
2147
2148 /// Returns the step value of the induction.
2150 const VPValue *getStepValue() const { return getOperand(1); }
2151
2152 /// Update the step value of the recipe.
2153 void setStepValue(VPValue *V) { setOperand(1, V); }
2154
2156 const VPValue *getVFValue() const { return getOperand(2); }
2157
2158 /// Returns the number of incoming values, also number of incoming blocks.
2159 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2160 /// incoming value, its start value.
2161 unsigned getNumIncoming() const override { return 1; }
2162
2164
2165 /// Returns the induction descriptor for the recipe.
2166 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2167
2169 // TODO: All operands of base recipe must exist and be at same index in
2170 // derived recipe.
2172 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2173 }
2174
2176 // TODO: All operands of base recipe must exist and be at same index in
2177 // derived recipe.
2179 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2180 }
2181
2182 /// Returns true if the recipe only uses the first lane of operand \p Op.
2183 bool usesFirstLaneOnly(const VPValue *Op) const override {
2185 "Op must be an operand of the recipe");
2186 // The recipe creates its own wide start value, so it only requests the
2187 // first lane of the operand.
2188 // TODO: Remove once creating the start value is modeled separately.
2189 return Op == getStartValue() || Op == getStepValue();
2190 }
2191};
2192
2193/// A recipe for handling phi nodes of integer and floating-point inductions,
2194/// producing their vector values. This is an abstract recipe and must be
2195/// converted to concrete recipes before executing.
2197 public VPIRFlags {
2198 TruncInst *Trunc;
2199
2200 // If this recipe is unrolled it will have 2 additional operands.
2201 bool isUnrolled() const { return getNumOperands() == 5; }
2202
2203public:
2205 VPValue *VF, const InductionDescriptor &IndDesc,
2206 const VPIRFlags &Flags, DebugLoc DL)
2207 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2208 Step, IndDesc, DL),
2209 VPIRFlags(Flags), Trunc(nullptr) {
2210 addOperand(VF);
2211 }
2212
2214 VPValue *VF, const InductionDescriptor &IndDesc,
2215 TruncInst *Trunc, const VPIRFlags &Flags,
2216 DebugLoc DL)
2217 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2218 Step, IndDesc, DL),
2219 VPIRFlags(Flags), Trunc(Trunc) {
2220 addOperand(VF);
2222 (void)Metadata;
2223 if (Trunc)
2225 assert(Metadata.empty() && "unexpected metadata on Trunc");
2226 }
2227
2229
2235
2236 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2237
2238 void execute(VPTransformState &State) override {
2239 llvm_unreachable("cannot execute this recipe, should be expanded via "
2240 "expandVPWidenIntOrFpInductionRecipe");
2241 }
2242
2244 // If the recipe has been unrolled return the VPValue for the induction
2245 // increment.
2246 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2247 }
2248
2249 /// Returns the number of incoming values, also number of incoming blocks.
2250 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2251 /// incoming value, its start value.
2252 unsigned getNumIncoming() const override { return 1; }
2253
2254 /// Returns the first defined value as TruncInst, if it is one or nullptr
2255 /// otherwise.
2256 TruncInst *getTruncInst() { return Trunc; }
2257 const TruncInst *getTruncInst() const { return Trunc; }
2258
2259 /// Returns true if the induction is canonical, i.e. starting at 0 and
2260 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2261 /// same type as the canonical induction.
2262 bool isCanonical() const;
2263
2264 /// Returns the scalar type of the induction.
2266 return Trunc ? Trunc->getType()
2268 }
2269
2270 /// Returns the VPValue representing the value of this induction at
2271 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2272 /// take place.
2274 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2275 }
2276
2277protected:
2278#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2279 /// Print the recipe.
2280 void printRecipe(raw_ostream &O, const Twine &Indent,
2281 VPSlotTracker &SlotTracker) const override;
2282#endif
2283};
2284
2286 bool IsScalarAfterVectorization;
2287
2288public:
2289 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2290 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2291 /// VF*UF.
2293 VPValue *NumUnrolledElems,
2294 const InductionDescriptor &IndDesc,
2295 bool IsScalarAfterVectorization, DebugLoc DL)
2296 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2297 Step, IndDesc, DL),
2298 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2299 addOperand(NumUnrolledElems);
2300 }
2301
2303
2307 getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
2308 getDebugLoc());
2309 }
2310
2311 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2312
2313 /// Generate vector values for the pointer induction.
2314 void execute(VPTransformState &State) override {
2315 llvm_unreachable("cannot execute this recipe, should be expanded via "
2316 "expandVPWidenPointerInduction");
2317 };
2318
2319 /// Returns true if only scalar values will be generated.
2320 bool onlyScalarsGenerated(bool IsScalable);
2321
2322protected:
2323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2324 /// Print the recipe.
2325 void printRecipe(raw_ostream &O, const Twine &Indent,
2326 VPSlotTracker &SlotTracker) const override;
2327#endif
2328};
2329
2330/// A recipe for widened phis. Incoming values are operands of the recipe and
2331/// their operand index corresponds to the incoming predecessor block. If the
2332/// recipe is placed in an entry block to a (non-replicate) region, it must have
2333/// exactly 2 incoming values, the first from the predecessor of the region and
2334/// the second from the exiting block of the region.
2336 public VPPhiAccessors {
2337 /// Name to use for the generated IR instruction for the widened phi.
2338 std::string Name;
2339
2340public:
2341 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2342 /// debug location \p DL.
2343 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2344 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2345 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2346 if (Start)
2347 addOperand(Start);
2348 }
2349
2352 getOperand(0), getDebugLoc(), Name);
2354 C->addOperand(Op);
2355 return C;
2356 }
2357
2358 ~VPWidenPHIRecipe() override = default;
2359
2360 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2361
2362 /// Generate the phi/select nodes.
2363 void execute(VPTransformState &State) override;
2364
2365protected:
2366#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2367 /// Print the recipe.
2368 void printRecipe(raw_ostream &O, const Twine &Indent,
2369 VPSlotTracker &SlotTracker) const override;
2370#endif
2371
2372 const VPRecipeBase *getAsRecipe() const override { return this; }
2373};
2374
2375/// A recipe for handling first-order recurrence phis. The start value is the
2376/// first operand of the recipe and the incoming value from the backedge is the
2377/// second operand.
2380 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2381
2382 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2383
2388
2389 void execute(VPTransformState &State) override;
2390
2391 /// Return the cost of this first-order recurrence phi recipe.
2393 VPCostContext &Ctx) const override;
2394
2395 /// Returns true if the recipe only uses the first lane of operand \p Op.
2396 bool usesFirstLaneOnly(const VPValue *Op) const override {
2398 "Op must be an operand of the recipe");
2399 return Op == getStartValue();
2400 }
2401
2402protected:
2403#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2404 /// Print the recipe.
2405 void printRecipe(raw_ostream &O, const Twine &Indent,
2406 VPSlotTracker &SlotTracker) const override;
2407#endif
2408};
2409
2410/// A recipe for handling reduction phis. The start value is the first operand
2411/// of the recipe and the incoming value from the backedge is the second
2412/// operand.
2414 public VPUnrollPartAccessor<2> {
2415 /// The recurrence kind of the reduction.
2416 const RecurKind Kind;
2417
2418 /// The phi is part of an in-loop reduction.
2419 bool IsInLoop;
2420
2421 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2422 bool IsOrdered;
2423
2424 /// When expanding the reduction PHI, the plan's VF element count is divided
2425 /// by this factor to form the reduction phi's VF.
2426 unsigned VFScaleFactor = 1;
2427
2428public:
2429 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2431 bool IsInLoop = false, bool IsOrdered = false,
2432 unsigned VFScaleFactor = 1)
2433 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2434 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2435 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2436 }
2437
2438 ~VPReductionPHIRecipe() override = default;
2439
2441 auto *R = new VPReductionPHIRecipe(
2443 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2444 R->addOperand(getBackedgeValue());
2445 return R;
2446 }
2447
2448 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2449
2450 /// Generate the phi/select nodes.
2451 void execute(VPTransformState &State) override;
2452
2453 /// Get the factor that the VF of this recipe's output should be scaled by.
2454 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2455
2456 /// Returns the number of incoming values, also number of incoming blocks.
2457 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2458 /// incoming value, its start value.
2459 unsigned getNumIncoming() const override { return 2; }
2460
2461 /// Returns the recurrence kind of the reduction.
2462 RecurKind getRecurrenceKind() const { return Kind; }
2463
2464 /// Returns true, if the phi is part of an ordered reduction.
2465 bool isOrdered() const { return IsOrdered; }
2466
2467 /// Returns true, if the phi is part of an in-loop reduction.
2468 bool isInLoop() const { return IsInLoop; }
2469
2470 /// Returns true if the recipe only uses the first lane of operand \p Op.
2471 bool usesFirstLaneOnly(const VPValue *Op) const override {
2473 "Op must be an operand of the recipe");
2474 return isOrdered() || isInLoop();
2475 }
2476
2477protected:
2478#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2479 /// Print the recipe.
2480 void printRecipe(raw_ostream &O, const Twine &Indent,
2481 VPSlotTracker &SlotTracker) const override;
2482#endif
2483};
2484
2485/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2486/// instructions.
2488public:
2489 /// The blend operation is a User of the incoming values and of their
2490 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2491 /// be omitted (implied by passing an odd number of operands) in which case
2492 /// all other incoming values are merged into it.
2494 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2495 assert(Operands.size() > 0 && "Expected at least one operand!");
2496 }
2497
2502
2503 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2504
2505 /// A normalized blend is one that has an odd number of operands, whereby the
2506 /// first operand does not have an associated mask.
2507 bool isNormalized() const { return getNumOperands() % 2; }
2508
2509 /// Return the number of incoming values, taking into account when normalized
2510 /// the first incoming value will have no mask.
2511 unsigned getNumIncomingValues() const {
2512 return (getNumOperands() + isNormalized()) / 2;
2513 }
2514
2515 /// Return incoming value number \p Idx.
2516 VPValue *getIncomingValue(unsigned Idx) const {
2517 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2518 }
2519
2520 /// Return mask number \p Idx.
2521 VPValue *getMask(unsigned Idx) const {
2522 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2523 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2524 }
2525
2526 /// Set mask number \p Idx to \p V.
2527 void setMask(unsigned Idx, VPValue *V) {
2528 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2529 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2530 }
2531
2532 void execute(VPTransformState &State) override {
2533 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2534 }
2535
2536 /// Return the cost of this VPWidenMemoryRecipe.
2537 InstructionCost computeCost(ElementCount VF,
2538 VPCostContext &Ctx) const override;
2539
2540 /// Returns true if the recipe only uses the first lane of operand \p Op.
2541 bool usesFirstLaneOnly(const VPValue *Op) const override {
2543 "Op must be an operand of the recipe");
2544 // Recursing through Blend recipes only, must terminate at header phi's the
2545 // latest.
2546 return all_of(users(),
2547 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2548 }
2549
2550protected:
2551#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2552 /// Print the recipe.
2553 void printRecipe(raw_ostream &O, const Twine &Indent,
2554 VPSlotTracker &SlotTracker) const override;
2555#endif
2556};
2557
2558/// A common base class for interleaved memory operations.
2559/// An Interleaved memory operation is a memory access method that combines
2560/// multiple strided loads/stores into a single wide load/store with shuffles.
2561/// The first operand is the start address. The optional operands are, in order,
2562/// the stored values and the mask.
2564 public VPIRMetadata {
2566
2567 /// Indicates if the interleave group is in a conditional block and requires a
2568 /// mask.
2569 bool HasMask = false;
2570
2571 /// Indicates if gaps between members of the group need to be masked out or if
2572 /// unusued gaps can be loaded speculatively.
2573 bool NeedsMaskForGaps = false;
2574
2575protected:
2576 VPInterleaveBase(const unsigned char SC,
2578 ArrayRef<VPValue *> Operands,
2579 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2580 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2581 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2582 NeedsMaskForGaps(NeedsMaskForGaps) {
2583 // TODO: extend the masked interleaved-group support to reversed access.
2584 assert((!Mask || !IG->isReverse()) &&
2585 "Reversed masked interleave-group not supported.");
2586 for (unsigned I = 0; I < IG->getFactor(); ++I)
2587 if (Instruction *Inst = IG->getMember(I)) {
2588 if (Inst->getType()->isVoidTy())
2589 continue;
2590 new VPValue(Inst, this);
2591 }
2592
2593 for (auto *SV : StoredValues)
2594 addOperand(SV);
2595 if (Mask) {
2596 HasMask = true;
2597 addOperand(Mask);
2598 }
2599 }
2600
2601public:
2602 VPInterleaveBase *clone() override = 0;
2603
2604 static inline bool classof(const VPRecipeBase *R) {
2605 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2606 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2607 }
2608
2609 static inline bool classof(const VPUser *U) {
2610 auto *R = dyn_cast<VPRecipeBase>(U);
2611 return R && classof(R);
2612 }
2613
2614 /// Return the address accessed by this recipe.
2615 VPValue *getAddr() const {
2616 return getOperand(0); // Address is the 1st, mandatory operand.
2617 }
2618
2619 /// Return the mask used by this recipe. Note that a full mask is represented
2620 /// by a nullptr.
2621 VPValue *getMask() const {
2622 // Mask is optional and the last operand.
2623 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2624 }
2625
2626 /// Return true if the access needs a mask because of the gaps.
2627 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2628
2630
2631 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2632
2633 void execute(VPTransformState &State) override {
2634 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2635 }
2636
2637 /// Return the cost of this recipe.
2638 InstructionCost computeCost(ElementCount VF,
2639 VPCostContext &Ctx) const override;
2640
2641 /// Returns true if the recipe only uses the first lane of operand \p Op.
2642 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2643
2644 /// Returns the number of stored operands of this interleave group. Returns 0
2645 /// for load interleave groups.
2646 virtual unsigned getNumStoreOperands() const = 0;
2647
2648 /// Return the VPValues stored by this interleave group. If it is a load
2649 /// interleave group, return an empty ArrayRef.
2651 return ArrayRef<VPValue *>(op_end() -
2652 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2654 }
2655};
2656
2657/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2658/// or stores into one wide load/store and shuffles. The first operand of a
2659/// VPInterleave recipe is the address, followed by the stored values, followed
2660/// by an optional mask.
2662public:
2664 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2665 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2666 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2667 NeedsMaskForGaps, MD, DL) {}
2668
2669 ~VPInterleaveRecipe() override = default;
2670
2674 needsMaskForGaps(), *this, getDebugLoc());
2675 }
2676
2677 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2678
2679 /// Generate the wide load or store, and shuffles.
2680 void execute(VPTransformState &State) override;
2681
2682 bool usesFirstLaneOnly(const VPValue *Op) const override {
2684 "Op must be an operand of the recipe");
2685 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2686 }
2687
2688 unsigned getNumStoreOperands() const override {
2689 return getNumOperands() - (getMask() ? 2 : 1);
2690 }
2691
2692protected:
2693#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2694 /// Print the recipe.
2695 void printRecipe(raw_ostream &O, const Twine &Indent,
2696 VPSlotTracker &SlotTracker) const override;
2697#endif
2698};
2699
2700/// A recipe for interleaved memory operations with vector-predication
2701/// intrinsics. The first operand is the address, the second operand is the
2702/// explicit vector length. Stored values and mask are optional operands.
2704public:
2706 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2707 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2708 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2709 R.getDebugLoc()) {
2710 assert(!getInterleaveGroup()->isReverse() &&
2711 "Reversed interleave-group with tail folding is not supported.");
2712 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2713 "supported for scalable vector.");
2714 }
2715
2716 ~VPInterleaveEVLRecipe() override = default;
2717
2719 llvm_unreachable("cloning not implemented yet");
2720 }
2721
2722 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2723
2724 /// The VPValue of the explicit vector length.
2725 VPValue *getEVL() const { return getOperand(1); }
2726
2727 /// Generate the wide load or store, and shuffles.
2728 void execute(VPTransformState &State) override;
2729
2730 /// The recipe only uses the first lane of the address, and EVL operand.
2731 bool usesFirstLaneOnly(const VPValue *Op) const override {
2733 "Op must be an operand of the recipe");
2734 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2735 Op == getEVL();
2736 }
2737
2738 unsigned getNumStoreOperands() const override {
2739 return getNumOperands() - (getMask() ? 3 : 2);
2740 }
2741
2742protected:
2743#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2744 /// Print the recipe.
2745 void printRecipe(raw_ostream &O, const Twine &Indent,
2746 VPSlotTracker &SlotTracker) const override;
2747#endif
2748};
2749
2750/// A recipe to represent inloop reduction operations, performing a reduction on
2751/// a vector operand into a scalar value, and adding the result to a chain.
2752/// The Operands are {ChainOp, VecOp, [Condition]}.
2754 /// The recurrence kind for the reduction in question.
2755 RecurKind RdxKind;
2756 bool IsOrdered;
2757 /// Whether the reduction is conditional.
2758 bool IsConditional = false;
2759
2760protected:
2761 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2763 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2764 bool IsOrdered, DebugLoc DL)
2765 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2766 IsOrdered(IsOrdered) {
2767 if (CondOp) {
2768 IsConditional = true;
2769 addOperand(CondOp);
2770 }
2772 }
2773
2774public:
2776 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2777 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2778 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2779 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2780 IsOrdered, DL) {}
2781
2783 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2784 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2785 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2786 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2787 IsOrdered, DL) {}
2788
2789 ~VPReductionRecipe() override = default;
2790
2792 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2794 getCondOp(), IsOrdered, getDebugLoc());
2795 }
2796
2797 static inline bool classof(const VPRecipeBase *R) {
2798 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2799 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2800 R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
2801 }
2802
2803 static inline bool classof(const VPUser *U) {
2804 auto *R = dyn_cast<VPRecipeBase>(U);
2805 return R && classof(R);
2806 }
2807
2808 static inline bool classof(const VPValue *VPV) {
2809 const VPRecipeBase *R = VPV->getDefiningRecipe();
2810 return R && classof(R);
2811 }
2812
2813 static inline bool classof(const VPSingleDefRecipe *R) {
2814 return classof(static_cast<const VPRecipeBase *>(R));
2815 }
2816
2817 /// Generate the reduction in the loop.
2818 void execute(VPTransformState &State) override;
2819
2820 /// Return the cost of VPReductionRecipe.
2821 InstructionCost computeCost(ElementCount VF,
2822 VPCostContext &Ctx) const override;
2823
2824 /// Return the recurrence kind for the in-loop reduction.
2825 RecurKind getRecurrenceKind() const { return RdxKind; }
2826 /// Return true if the in-loop reduction is ordered.
2827 bool isOrdered() const { return IsOrdered; };
2828 /// Return true if the in-loop reduction is conditional.
2829 bool isConditional() const { return IsConditional; };
2830 /// The VPValue of the scalar Chain being accumulated.
2831 VPValue *getChainOp() const { return getOperand(0); }
2832 /// The VPValue of the vector value to be reduced.
2833 VPValue *getVecOp() const { return getOperand(1); }
2834 /// The VPValue of the condition for the block.
2836 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2837 }
2838
2839protected:
2840#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2841 /// Print the recipe.
2842 void printRecipe(raw_ostream &O, const Twine &Indent,
2843 VPSlotTracker &SlotTracker) const override;
2844#endif
2845};
2846
2847/// A recipe for forming partial reductions. In the loop, an accumulator and
2848/// vector operand are added together and passed to the next iteration as the
2849/// next accumulator. After the loop body, the accumulator is reduced to a
2850/// scalar value.
2852 unsigned Opcode;
2853
2854 /// The divisor by which the VF of this recipe's output should be divided
2855 /// during execution.
2856 unsigned VFScaleFactor;
2857
2858public:
2860 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2861 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2862 VFScaleFactor, ReductionInst) {}
2863 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2864 VPValue *Cond, unsigned ScaleFactor,
2865 Instruction *ReductionInst = nullptr)
2866 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2867 FastMathFlags(), ReductionInst,
2868 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2869 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2870 [[maybe_unused]] auto *AccumulatorRecipe =
2872 // When cloning as part of a VPExpressionRecipe the chain op could have
2873 // replaced by a temporary VPValue, so it doesn't have a defining recipe.
2874 assert((!AccumulatorRecipe ||
2875 isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2876 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2877 "Unexpected operand order for partial reduction recipe");
2878 }
2879 ~VPPartialReductionRecipe() override = default;
2880
2882 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2883 getCondOp(), VFScaleFactor,
2885 }
2886
2887 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2888
2889 /// Generate the reduction in the loop.
2890 void execute(VPTransformState &State) override;
2891
2892 /// Return the cost of this VPPartialReductionRecipe.
2894 VPCostContext &Ctx) const override;
2895
2896 /// Get the binary op's opcode.
2897 unsigned getOpcode() const { return Opcode; }
2898
2899 /// Get the factor that the VF of this recipe's output should be scaled by.
2900 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2901
2902protected:
2903#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2904 /// Print the recipe.
2905 void printRecipe(raw_ostream &O, const Twine &Indent,
2906 VPSlotTracker &SlotTracker) const override;
2907#endif
2908};
2909
2910/// A recipe to represent inloop reduction operations with vector-predication
2911/// intrinsics, performing a reduction on a vector operand with the explicit
2912/// vector length (EVL) into a scalar value, and adding the result to a chain.
2913/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2915public:
2919 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2920 R.getFastMathFlags(),
2922 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2923 R.isOrdered(), DL) {}
2924
2925 ~VPReductionEVLRecipe() override = default;
2926
2928 llvm_unreachable("cloning not implemented yet");
2929 }
2930
2931 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2932
2933 /// Generate the reduction in the loop
2934 void execute(VPTransformState &State) override;
2935
2936 /// The VPValue of the explicit vector length.
2937 VPValue *getEVL() const { return getOperand(2); }
2938
2939 /// Returns true if the recipe only uses the first lane of operand \p Op.
2940 bool usesFirstLaneOnly(const VPValue *Op) const override {
2942 "Op must be an operand of the recipe");
2943 return Op == getEVL();
2944 }
2945
2946protected:
2947#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2948 /// Print the recipe.
2949 void printRecipe(raw_ostream &O, const Twine &Indent,
2950 VPSlotTracker &SlotTracker) const override;
2951#endif
2952};
2953
2954/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2955/// copies of the original scalar type, one per lane, instead of producing a
2956/// single copy of widened type for all lanes. If the instruction is known to be
2957/// a single scalar, only one copy, per lane zero, will be generated.
2959 public VPIRMetadata {
2960 /// Indicator if only a single replica per lane is needed.
2961 bool IsSingleScalar;
2962
2963 /// Indicator if the replicas are also predicated.
2964 bool IsPredicated;
2965
2966public:
2968 bool IsSingleScalar, VPValue *Mask = nullptr,
2969 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2970 DebugLoc DL = DebugLoc::getUnknown())
2971 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2972 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2973 IsPredicated(Mask) {
2974 setUnderlyingValue(I);
2975 if (Mask)
2976 addOperand(Mask);
2977 }
2978
2979 ~VPReplicateRecipe() override = default;
2980
2982 auto *Copy = new VPReplicateRecipe(
2983 getUnderlyingInstr(), operands(), IsSingleScalar,
2984 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2985 Copy->transferFlags(*this);
2986 return Copy;
2987 }
2988
2989 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2990
2991 /// Generate replicas of the desired Ingredient. Replicas will be generated
2992 /// for all parts and lanes unless a specific part and lane are specified in
2993 /// the \p State.
2994 void execute(VPTransformState &State) override;
2995
2996 /// Return the cost of this VPReplicateRecipe.
2997 InstructionCost computeCost(ElementCount VF,
2998 VPCostContext &Ctx) const override;
2999
3000 bool isSingleScalar() const { return IsSingleScalar; }
3001
3002 bool isPredicated() const { return IsPredicated; }
3003
3004 /// Returns true if the recipe only uses the first lane of operand \p Op.
3005 bool usesFirstLaneOnly(const VPValue *Op) const override {
3007 "Op must be an operand of the recipe");
3008 return isSingleScalar();
3009 }
3010
3011 /// Returns true if the recipe uses scalars of operand \p Op.
3012 bool usesScalars(const VPValue *Op) const override {
3014 "Op must be an operand of the recipe");
3015 return true;
3016 }
3017
3018 /// Returns true if the recipe is used by a widened recipe via an intervening
3019 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3020 /// in a vector.
3021 bool shouldPack() const;
3022
3023 /// Return the mask of a predicated VPReplicateRecipe.
3025 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3026 return getOperand(getNumOperands() - 1);
3027 }
3028
3029 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3030
3031protected:
3032#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3033 /// Print the recipe.
3034 void printRecipe(raw_ostream &O, const Twine &Indent,
3035 VPSlotTracker &SlotTracker) const override;
3036#endif
3037};
3038
3039/// A recipe for generating conditional branches on the bits of a mask.
3041public:
3043 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3044
3047 }
3048
3049 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3050
3051 /// Generate the extraction of the appropriate bit from the block mask and the
3052 /// conditional branch.
3053 void execute(VPTransformState &State) override;
3054
3055 /// Return the cost of this VPBranchOnMaskRecipe.
3056 InstructionCost computeCost(ElementCount VF,
3057 VPCostContext &Ctx) const override;
3058
3059#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3060 /// Print the recipe.
3061 void printRecipe(raw_ostream &O, const Twine &Indent,
3062 VPSlotTracker &SlotTracker) const override {
3063 O << Indent << "BRANCH-ON-MASK ";
3065 }
3066#endif
3067
3068 /// Returns true if the recipe uses scalars of operand \p Op.
3069 bool usesScalars(const VPValue *Op) const override {
3071 "Op must be an operand of the recipe");
3072 return true;
3073 }
3074};
3075
3076/// A recipe to combine multiple recipes into a single 'expression' recipe,
3077/// which should be considered a single entity for cost-modeling and transforms.
3078/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3079/// expression recipes, before execute. The individual expression recipes are
3080/// completely disconnected from the def-use graph of other recipes not part of
3081/// the expression. Def-use edges between pairs of expression recipes remain
3082/// intact, whereas every edge between an expression recipe and a recipe outside
3083/// the expression is elevated to connect the non-expression recipe with the
3084/// VPExpressionRecipe itself.
3085class VPExpressionRecipe : public VPSingleDefRecipe {
3086 /// Recipes included in this VPExpressionRecipe. This could contain
3087 /// duplicates.
3088 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3089
3090 /// Temporary VPValues used for external operands of the expression, i.e.
3091 /// operands not defined by recipes in the expression.
3092 SmallVector<VPValue *> LiveInPlaceholders;
3093
3094 enum class ExpressionTypes {
3095 /// Represents an inloop extended reduction operation, performing a
3096 /// reduction on an extended vector operand into a scalar value, and adding
3097 /// the result to a chain.
3098 ExtendedReduction,
3099 /// Represent an inloop multiply-accumulate reduction, multiplying the
3100 /// extended vector operands, performing a reduction.add on the result, and
3101 /// adding the scalar result to a chain.
3102 ExtMulAccReduction,
3103 /// Represent an inloop multiply-accumulate reduction, multiplying the
3104 /// vector operands, performing a reduction.add on the result, and adding
3105 /// the scalar result to a chain.
3106 MulAccReduction,
3107 /// Represent an inloop multiply-accumulate reduction, multiplying the
3108 /// extended vector operands, negating the multiplication, performing a
3109 /// reduction.add on the result, and adding the scalar result to a chain.
3110 ExtNegatedMulAccReduction,
3111 };
3112
3113 /// Type of the expression.
3114 ExpressionTypes ExpressionType;
3115
3116 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3117 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3118 /// in the expression) are replaced by temporary VPValues and the original
3119 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3120 /// as needed (excluding last) to ensure they are only used by other recipes
3121 /// in the expression.
3122 VPExpressionRecipe(ExpressionTypes ExpressionType,
3123 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3124
3125public:
3127 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3129 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3132 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3133 {Ext0, Ext1, Mul, Red}) {}
3136 VPReductionRecipe *Red)
3137 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3138 {Ext0, Ext1, Mul, Sub, Red}) {
3139 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3140 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3141 "Expected an add reduction");
3142 assert(getNumOperands() >= 3 && "Expected at least three operands");
3143 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3144 assert(SubConst && SubConst->getValue() == 0 &&
3145 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3146 }
3147
3149 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3150 for (auto *R : reverse(ExpressionRecipes)) {
3151 if (ExpressionRecipesSeen.insert(R).second)
3152 delete R;
3153 }
3154 for (VPValue *T : LiveInPlaceholders)
3155 delete T;
3156 }
3157
3158 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3159
3160 VPExpressionRecipe *clone() override {
3161 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3162 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3163 for (auto *R : ExpressionRecipes)
3164 NewExpressiondRecipes.push_back(R->clone());
3165 for (auto *New : NewExpressiondRecipes) {
3166 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3167 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3168 // Update placeholder operands in the cloned recipe to use the external
3169 // operands, to be internalized when the cloned expression is constructed.
3170 for (const auto &[Placeholder, OutsideOp] :
3171 zip(LiveInPlaceholders, operands()))
3172 New->replaceUsesOfWith(Placeholder, OutsideOp);
3173 }
3174 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3175 }
3176
3177 /// Return the VPValue to use to infer the result type of the recipe.
3179 unsigned OpIdx =
3180 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3181 : 1;
3182 return getOperand(getNumOperands() - OpIdx);
3183 }
3184
3185 /// Insert the recipes of the expression back into the VPlan, directly before
3186 /// the current recipe. Leaves the expression recipe empty, which must be
3187 /// removed before codegen.
3188 void decompose();
3189
3190 unsigned getVFScaleFactor() const {
3191 auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3192 return PR ? PR->getVFScaleFactor() : 1;
3193 }
3194
3195 /// Method for generating code, must not be called as this recipe is abstract.
3196 void execute(VPTransformState &State) override {
3197 llvm_unreachable("recipe must be removed before execute");
3198 }
3199
3201 VPCostContext &Ctx) const override;
3202
3203 /// Returns true if this expression contains recipes that may read from or
3204 /// write to memory.
3205 bool mayReadOrWriteMemory() const;
3206
3207 /// Returns true if this expression contains recipes that may have side
3208 /// effects.
3209 bool mayHaveSideEffects() const;
3210
3211 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3212 bool isSingleScalar() const;
3213
3214protected:
3215#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3216 /// Print the recipe.
3217 void printRecipe(raw_ostream &O, const Twine &Indent,
3218 VPSlotTracker &SlotTracker) const override;
3219#endif
3220};
3221
3222/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3223/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3224/// order to merge values that are set under such a branch and feed their uses.
3225/// The phi nodes can be scalar or vector depending on the users of the value.
3226/// This recipe works in concert with VPBranchOnMaskRecipe.
3228public:
3229 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3230 /// nodes after merging back from a Branch-on-Mask.
3232 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3233 ~VPPredInstPHIRecipe() override = default;
3234
3236 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3237 }
3238
3239 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3240
3241 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3242 /// retain SSA form.
3243 void execute(VPTransformState &State) override;
3244
3245 /// Return the cost of this VPPredInstPHIRecipe.
3247 VPCostContext &Ctx) const override {
3248 // TODO: Compute accurate cost after retiring the legacy cost model.
3249 return 0;
3250 }
3251
3252 /// Returns true if the recipe uses scalars of operand \p Op.
3253 bool usesScalars(const VPValue *Op) const override {
3255 "Op must be an operand of the recipe");
3256 return true;
3257 }
3258
3259protected:
3260#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3261 /// Print the recipe.
3262 void printRecipe(raw_ostream &O, const Twine &Indent,
3263 VPSlotTracker &SlotTracker) const override;
3264#endif
3265};
3266
3267/// A common base class for widening memory operations. An optional mask can be
3268/// provided as the last operand.
3270 public VPIRMetadata {
3271protected:
3273
3274 /// Alignment information for this memory access.
3276
3277 /// Whether the accessed addresses are consecutive.
3279
3280 /// Whether the consecutive accessed addresses are in reverse order.
3282
3283 /// Whether the memory access is masked.
3284 bool IsMasked = false;
3285
3286 void setMask(VPValue *Mask) {
3287 assert(!IsMasked && "cannot re-set mask");
3288 if (!Mask)
3289 return;
3290 addOperand(Mask);
3291 IsMasked = true;
3292 }
3293
3294 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3295 std::initializer_list<VPValue *> Operands,
3296 bool Consecutive, bool Reverse,
3297 const VPIRMetadata &Metadata, DebugLoc DL)
3298 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3300 Reverse(Reverse) {
3301 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3303 "Reversed acccess without VPVectorEndPointerRecipe address?");
3304 }
3305
3306public:
3308 llvm_unreachable("cloning not supported");
3309 }
3310
3311 static inline bool classof(const VPRecipeBase *R) {
3312 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3313 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3314 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3315 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3316 }
3317
3318 static inline bool classof(const VPUser *U) {
3319 auto *R = dyn_cast<VPRecipeBase>(U);
3320 return R && classof(R);
3321 }
3322
3323 /// Return whether the loaded-from / stored-to addresses are consecutive.
3324 bool isConsecutive() const { return Consecutive; }
3325
3326 /// Return whether the consecutive loaded/stored addresses are in reverse
3327 /// order.
3328 bool isReverse() const { return Reverse; }
3329
3330 /// Return the address accessed by this recipe.
3331 VPValue *getAddr() const { return getOperand(0); }
3332
3333 /// Returns true if the recipe is masked.
3334 bool isMasked() const { return IsMasked; }
3335
3336 /// Return the mask used by this recipe. Note that a full mask is represented
3337 /// by a nullptr.
3338 VPValue *getMask() const {
3339 // Mask is optional and therefore the last operand.
3340 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3341 }
3342
3343 /// Returns the alignment of the memory access.
3344 Align getAlign() const { return Alignment; }
3345
3346 /// Generate the wide load/store.
3347 void execute(VPTransformState &State) override {
3348 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3349 }
3350
3351 /// Return the cost of this VPWidenMemoryRecipe.
3352 InstructionCost computeCost(ElementCount VF,
3353 VPCostContext &Ctx) const override;
3354
3356};
3357
3358/// A recipe for widening load operations, using the address to load from and an
3359/// optional mask.
3361 public VPValue {
3363 bool Consecutive, bool Reverse,
3364 const VPIRMetadata &Metadata, DebugLoc DL)
3365 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3366 Reverse, Metadata, DL),
3367 VPValue(this, &Load) {
3368 setMask(Mask);
3369 }
3370
3373 getMask(), Consecutive, Reverse, *this,
3374 getDebugLoc());
3375 }
3376
3377 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3378
3379 /// Generate a wide load or gather.
3380 void execute(VPTransformState &State) override;
3381
3382 /// Returns true if the recipe only uses the first lane of operand \p Op.
3383 bool usesFirstLaneOnly(const VPValue *Op) const override {
3385 "Op must be an operand of the recipe");
3386 // Widened, consecutive loads operations only demand the first lane of
3387 // their address.
3388 return Op == getAddr() && isConsecutive();
3389 }
3390
3391protected:
3392#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3393 /// Print the recipe.
3394 void printRecipe(raw_ostream &O, const Twine &Indent,
3395 VPSlotTracker &SlotTracker) const override;
3396#endif
3397};
3398
3399/// A recipe for widening load operations with vector-predication intrinsics,
3400/// using the address to load from, the explicit vector length and an optional
3401/// mask.
3402struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3404 VPValue *Mask)
3405 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3406 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3407 L.getDebugLoc()),
3408 VPValue(this, &getIngredient()) {
3409 setMask(Mask);
3410 }
3411
3412 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3413
3414 /// Return the EVL operand.
3415 VPValue *getEVL() const { return getOperand(1); }
3416
3417 /// Generate the wide load or gather.
3418 void execute(VPTransformState &State) override;
3419
3420 /// Return the cost of this VPWidenLoadEVLRecipe.
3422 VPCostContext &Ctx) const override;
3423
3424 /// Returns true if the recipe only uses the first lane of operand \p Op.
3425 bool usesFirstLaneOnly(const VPValue *Op) const override {
3427 "Op must be an operand of the recipe");
3428 // Widened loads only demand the first lane of EVL and consecutive loads
3429 // only demand the first lane of their address.
3430 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3431 }
3432
3433protected:
3434#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3435 /// Print the recipe.
3436 void printRecipe(raw_ostream &O, const Twine &Indent,
3437 VPSlotTracker &SlotTracker) const override;
3438#endif
3439};
3440
3441/// A recipe for widening store operations, using the stored value, the address
3442/// to store to and an optional mask.
3444 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3445 VPValue *Mask, bool Consecutive, bool Reverse,
3446 const VPIRMetadata &Metadata, DebugLoc DL)
3447 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3448 Consecutive, Reverse, Metadata, DL) {
3449 setMask(Mask);
3450 }
3451
3457
3458 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3459
3460 /// Return the value stored by this recipe.
3461 VPValue *getStoredValue() const { return getOperand(1); }
3462
3463 /// Generate a wide store or scatter.
3464 void execute(VPTransformState &State) override;
3465
3466 /// Returns true if the recipe only uses the first lane of operand \p Op.
3467 bool usesFirstLaneOnly(const VPValue *Op) const override {
3469 "Op must be an operand of the recipe");
3470 // Widened, consecutive stores only demand the first lane of their address,
3471 // unless the same operand is also stored.
3472 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3473 }
3474
3475protected:
3476#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3477 /// Print the recipe.
3478 void printRecipe(raw_ostream &O, const Twine &Indent,
3479 VPSlotTracker &SlotTracker) const override;
3480#endif
3481};
3482
3483/// A recipe for widening store operations with vector-predication intrinsics,
3484/// using the value to store, the address to store to, the explicit vector
3485/// length and an optional mask.
3488 VPValue *Mask)
3489 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3490 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3491 S.isReverse(), S, S.getDebugLoc()) {
3492 setMask(Mask);
3493 }
3494
3495 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3496
3497 /// Return the address accessed by this recipe.
3498 VPValue *getStoredValue() const { return getOperand(1); }
3499
3500 /// Return the EVL operand.
3501 VPValue *getEVL() const { return getOperand(2); }
3502
3503 /// Generate the wide store or scatter.
3504 void execute(VPTransformState &State) override;
3505
3506 /// Return the cost of this VPWidenStoreEVLRecipe.
3508 VPCostContext &Ctx) const override;
3509
3510 /// Returns true if the recipe only uses the first lane of operand \p Op.
3511 bool usesFirstLaneOnly(const VPValue *Op) const override {
3513 "Op must be an operand of the recipe");
3514 if (Op == getEVL()) {
3515 assert(getStoredValue() != Op && "unexpected store of EVL");
3516 return true;
3517 }
3518 // Widened, consecutive memory operations only demand the first lane of
3519 // their address, unless the same operand is also stored. That latter can
3520 // happen with opaque pointers.
3521 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3522 }
3523
3524protected:
3525#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3526 /// Print the recipe.
3527 void printRecipe(raw_ostream &O, const Twine &Indent,
3528 VPSlotTracker &SlotTracker) const override;
3529#endif
3530};
3531
3532/// Recipe to expand a SCEV expression.
3534 const SCEV *Expr;
3535
3536public:
3538 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3539
3540 ~VPExpandSCEVRecipe() override = default;
3541
3542 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3543
3544 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3545
3546 void execute(VPTransformState &State) override {
3547 llvm_unreachable("SCEV expressions must be expanded before final execute");
3548 }
3549
3550 /// Return the cost of this VPExpandSCEVRecipe.
3552 VPCostContext &Ctx) const override {
3553 // TODO: Compute accurate cost after retiring the legacy cost model.
3554 return 0;
3555 }
3556
3557 const SCEV *getSCEV() const { return Expr; }
3558
3559protected:
3560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3561 /// Print the recipe.
3562 void printRecipe(raw_ostream &O, const Twine &Indent,
3563 VPSlotTracker &SlotTracker) const override;
3564#endif
3565};
3566
3567/// Canonical scalar induction phi of the vector loop. Starting at the specified
3568/// start value (either 0 or the resume value when vectorizing the epilogue
3569/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3570/// canonical induction variable.
3572public:
3574 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3575
3576 ~VPCanonicalIVPHIRecipe() override = default;
3577
3579 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3580 R->addOperand(getBackedgeValue());
3581 return R;
3582 }
3583
3584 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3585
3586 void execute(VPTransformState &State) override {
3587 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3588 "scalar phi recipe");
3589 }
3590
3591 /// Returns the scalar type of the induction.
3593 return getStartValue()->getLiveInIRValue()->getType();
3594 }
3595
3596 /// Returns true if the recipe only uses the first lane of operand \p Op.
3597 bool usesFirstLaneOnly(const VPValue *Op) const override {
3599 "Op must be an operand of the recipe");
3600 return true;
3601 }
3602
3603 /// Returns true if the recipe only uses the first part of operand \p Op.
3604 bool usesFirstPartOnly(const VPValue *Op) const override {
3606 "Op must be an operand of the recipe");
3607 return true;
3608 }
3609
3610 /// Return the cost of this VPCanonicalIVPHIRecipe.
3612 VPCostContext &Ctx) const override {
3613 // For now, match the behavior of the legacy cost model.
3614 return 0;
3615 }
3616
3617protected:
3618#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3619 /// Print the recipe.
3620 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3621 VPSlotTracker &SlotTracker) const override;
3622#endif
3623};
3624
3625/// A recipe for generating the active lane mask for the vector loop that is
3626/// used to predicate the vector operations.
3627/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3628/// remove VPActiveLaneMaskPHIRecipe.
3630public:
3632 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3633 DL) {}
3634
3635 ~VPActiveLaneMaskPHIRecipe() override = default;
3636
3639 if (getNumOperands() == 2)
3640 R->addOperand(getOperand(1));
3641 return R;
3642 }
3643
3644 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3645
3646 /// Generate the active lane mask phi of the vector loop.
3647 void execute(VPTransformState &State) override;
3648
3649protected:
3650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3651 /// Print the recipe.
3652 void printRecipe(raw_ostream &O, const Twine &Indent,
3653 VPSlotTracker &SlotTracker) const override;
3654#endif
3655};
3656
3657/// A recipe for generating the phi node for the current index of elements,
3658/// adjusted in accordance with EVL value. It starts at the start value of the
3659/// canonical induction and gets incremented by EVL in each iteration of the
3660/// vector loop.
3662public:
3664 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3665
3666 ~VPEVLBasedIVPHIRecipe() override = default;
3667
3669 llvm_unreachable("cloning not implemented yet");
3670 }
3671
3672 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3673
3674 void execute(VPTransformState &State) override {
3675 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3676 "scalar phi recipe");
3677 }
3678
3679 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3681 VPCostContext &Ctx) const override {
3682 // For now, match the behavior of the legacy cost model.
3683 return 0;
3684 }
3685
3686 /// Returns true if the recipe only uses the first lane of operand \p Op.
3687 bool usesFirstLaneOnly(const VPValue *Op) const override {
3689 "Op must be an operand of the recipe");
3690 return true;
3691 }
3692
3693protected:
3694#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3695 /// Print the recipe.
3696 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3697 VPSlotTracker &SlotTracker) const override;
3698#endif
3699};
3700
3701/// A Recipe for widening the canonical induction variable of the vector loop.
3703 public VPUnrollPartAccessor<1> {
3704public:
3706 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3707
3708 ~VPWidenCanonicalIVRecipe() override = default;
3709
3714
3715 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3716
3717 /// Generate a canonical vector induction variable of the vector loop, with
3718 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3719 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3720 void execute(VPTransformState &State) override;
3721
3722 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3724 VPCostContext &Ctx) const override {
3725 // TODO: Compute accurate cost after retiring the legacy cost model.
3726 return 0;
3727 }
3728
3729protected:
3730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3731 /// Print the recipe.
3732 void printRecipe(raw_ostream &O, const Twine &Indent,
3733 VPSlotTracker &SlotTracker) const override;
3734#endif
3735};
3736
3737/// A recipe for converting the input value \p IV value to the corresponding
3738/// value of an IV with different start and step values, using Start + IV *
3739/// Step.
3741 /// Kind of the induction.
3743 /// If not nullptr, the floating point induction binary operator. Must be set
3744 /// for floating point inductions.
3745 const FPMathOperator *FPBinOp;
3746
3747 /// Name to use for the generated IR instruction for the derived IV.
3748 std::string Name;
3749
3750public:
3752 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3753 const Twine &Name = "")
3755 IndDesc.getKind(),
3756 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3757 Start, CanonicalIV, Step, Name) {}
3758
3760 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3761 VPValue *Step, const Twine &Name = "")
3762 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3763 FPBinOp(FPBinOp), Name(Name.str()) {}
3764
3765 ~VPDerivedIVRecipe() override = default;
3766
3768 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3769 getStepValue());
3770 }
3771
3772 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3773
3774 /// Generate the transformed value of the induction at offset StartValue (1.
3775 /// operand) + IV (2. operand) * StepValue (3, operand).
3776 void execute(VPTransformState &State) override;
3777
3778 /// Return the cost of this VPDerivedIVRecipe.
3780 VPCostContext &Ctx) const override {
3781 // TODO: Compute accurate cost after retiring the legacy cost model.
3782 return 0;
3783 }
3784
3786 return getStartValue()->getLiveInIRValue()->getType();
3787 }
3788
3789 VPValue *getStartValue() const { return getOperand(0); }
3790 VPValue *getStepValue() const { return getOperand(2); }
3791
3792 /// Returns true if the recipe only uses the first lane of operand \p Op.
3793 bool usesFirstLaneOnly(const VPValue *Op) const override {
3795 "Op must be an operand of the recipe");
3796 return true;
3797 }
3798
3799protected:
3800#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3801 /// Print the recipe.
3802 void printRecipe(raw_ostream &O, const Twine &Indent,
3803 VPSlotTracker &SlotTracker) const override;
3804#endif
3805};
3806
3807/// A recipe for handling phi nodes of integer and floating-point inductions,
3808/// producing their scalar values.
3810 public VPUnrollPartAccessor<3> {
3811 Instruction::BinaryOps InductionOpcode;
3812
3813public:
3816 DebugLoc DL)
3817 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3818 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3819 InductionOpcode(Opcode) {}
3820
3822 VPValue *Step, VPValue *VF,
3825 IV, Step, VF, IndDesc.getInductionOpcode(),
3826 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3827 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3828 : FastMathFlags(),
3829 DL) {}
3830
3831 ~VPScalarIVStepsRecipe() override = default;
3832
3834 return new VPScalarIVStepsRecipe(
3835 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3837 getDebugLoc());
3838 }
3839
3840 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3841 /// this is only accurate after the VPlan has been unrolled.
3842 bool isPart0() const { return getUnrollPart(*this) == 0; }
3843
3844 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3845
3846 /// Generate the scalarized versions of the phi node as needed by their users.
3847 void execute(VPTransformState &State) override;
3848
3849 /// Return the cost of this VPScalarIVStepsRecipe.
3851 VPCostContext &Ctx) const override {
3852 // TODO: Compute accurate cost after retiring the legacy cost model.
3853 return 0;
3854 }
3855
3856 VPValue *getStepValue() const { return getOperand(1); }
3857
3858 /// Returns true if the recipe only uses the first lane of operand \p Op.
3859 bool usesFirstLaneOnly(const VPValue *Op) const override {
3861 "Op must be an operand of the recipe");
3862 return true;
3863 }
3864
3865protected:
3866#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3867 /// Print the recipe.
3868 void printRecipe(raw_ostream &O, const Twine &Indent,
3869 VPSlotTracker &SlotTracker) const override;
3870#endif
3871};
3872
3873/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3874/// types implementing VPPhiAccessors. Used by isa<> & co.
3876 static inline bool isPossible(const VPRecipeBase *f) {
3877 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3879 }
3880};
3881/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3882/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3883template <typename SrcTy>
3884struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3885
3887
3888 /// doCast is used by cast<>.
3889 static inline VPPhiAccessors *doCast(SrcTy R) {
3890 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3891 switch (R->getVPDefID()) {
3892 case VPDef::VPInstructionSC:
3893 return cast<VPPhi>(R);
3894 case VPDef::VPIRInstructionSC:
3895 return cast<VPIRPhi>(R);
3896 case VPDef::VPWidenPHISC:
3897 return cast<VPWidenPHIRecipe>(R);
3898 default:
3899 return cast<VPHeaderPHIRecipe>(R);
3900 }
3901 }());
3902 }
3903
3904 /// doCastIfPossible is used by dyn_cast<>.
3905 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3906 if (!Self::isPossible(f))
3907 return nullptr;
3908 return doCast(f);
3909 }
3910};
3911template <>
3914template <>
3917
3918/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3919/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3920namespace detail {
3921template <typename DstTy, typename RecipeBasePtrTy>
3922static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3923 switch (R->getVPDefID()) {
3924 case VPDef::VPInstructionSC:
3925 return cast<VPInstruction>(R);
3926 case VPDef::VPWidenSC:
3927 return cast<VPWidenRecipe>(R);
3928 case VPDef::VPWidenCastSC:
3929 return cast<VPWidenCastRecipe>(R);
3930 case VPDef::VPWidenIntrinsicSC:
3932 case VPDef::VPWidenCallSC:
3933 return cast<VPWidenCallRecipe>(R);
3934 case VPDef::VPWidenSelectSC:
3935 return cast<VPWidenSelectRecipe>(R);
3936 case VPDef::VPReplicateSC:
3937 return cast<VPReplicateRecipe>(R);
3938 case VPDef::VPInterleaveSC:
3939 case VPDef::VPInterleaveEVLSC:
3940 return cast<VPInterleaveBase>(R);
3941 case VPDef::VPWidenLoadSC:
3942 case VPDef::VPWidenLoadEVLSC:
3943 case VPDef::VPWidenStoreSC:
3944 case VPDef::VPWidenStoreEVLSC:
3945 return cast<VPWidenMemoryRecipe>(R);
3946 default:
3947 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3948 }
3949}
3950} // namespace detail
3951
3952/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3953/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3954template <typename DstTy, typename SrcTy>
3955struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3956 static inline bool isPossible(SrcTy R) {
3957 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3958 // also handled in castToVPIRMetadata.
3964 }
3965
3966 using RetTy = DstTy *;
3967
3968 /// doCast is used by cast<>.
3969 static inline RetTy doCast(SrcTy R) {
3971 }
3972
3973 /// doCastIfPossible is used by dyn_cast<>.
3974 static inline RetTy doCastIfPossible(SrcTy R) {
3975 if (!isPossible(R))
3976 return nullptr;
3977 return doCast(R);
3978 }
3979};
3980template <>
3983template <>
3986
3987/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3988/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3989/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3990class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3991 friend class VPlan;
3992
3993 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3994 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3995 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3996 if (Recipe)
3997 appendRecipe(Recipe);
3998 }
3999
4000public:
4002
4003protected:
4004 /// The VPRecipes held in the order of output instructions to generate.
4006
4007 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4008 : VPBlockBase(BlockSC, Name.str()) {}
4009
4010public:
4011 ~VPBasicBlock() override {
4012 while (!Recipes.empty())
4013 Recipes.pop_back();
4014 }
4015
4016 /// Instruction iterators...
4021
4022 //===--------------------------------------------------------------------===//
4023 /// Recipe iterator methods
4024 ///
4025 inline iterator begin() { return Recipes.begin(); }
4026 inline const_iterator begin() const { return Recipes.begin(); }
4027 inline iterator end() { return Recipes.end(); }
4028 inline const_iterator end() const { return Recipes.end(); }
4029
4030 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4031 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4032 inline reverse_iterator rend() { return Recipes.rend(); }
4033 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4034
4035 inline size_t size() const { return Recipes.size(); }
4036 inline bool empty() const { return Recipes.empty(); }
4037 inline const VPRecipeBase &front() const { return Recipes.front(); }
4038 inline VPRecipeBase &front() { return Recipes.front(); }
4039 inline const VPRecipeBase &back() const { return Recipes.back(); }
4040 inline VPRecipeBase &back() { return Recipes.back(); }
4041
4042 /// Returns a reference to the list of recipes.
4044
4045 /// Returns a pointer to a member of the recipe list.
4046 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4047 return &VPBasicBlock::Recipes;
4048 }
4049
4050 /// Method to support type inquiry through isa, cast, and dyn_cast.
4051 static inline bool classof(const VPBlockBase *V) {
4052 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4053 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4054 }
4055
4056 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4057 assert(Recipe && "No recipe to append.");
4058 assert(!Recipe->Parent && "Recipe already in VPlan");
4059 Recipe->Parent = this;
4060 Recipes.insert(InsertPt, Recipe);
4061 }
4062
4063 /// Augment the existing recipes of a VPBasicBlock with an additional
4064 /// \p Recipe as the last recipe.
4065 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4066
4067 /// The method which generates the output IR instructions that correspond to
4068 /// this VPBasicBlock, thereby "executing" the VPlan.
4069 void execute(VPTransformState *State) override;
4070
4071 /// Return the cost of this VPBasicBlock.
4072 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4073
4074 /// Return the position of the first non-phi node recipe in the block.
4075 iterator getFirstNonPhi();
4076
4077 /// Returns an iterator range over the PHI-like recipes in the block.
4081
4082 /// Split current block at \p SplitAt by inserting a new block between the
4083 /// current block and its successors and moving all recipes starting at
4084 /// SplitAt to the new block. Returns the new block.
4085 VPBasicBlock *splitAt(iterator SplitAt);
4086
4087 VPRegionBlock *getEnclosingLoopRegion();
4088 const VPRegionBlock *getEnclosingLoopRegion() const;
4089
4090#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4091 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4092 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4093 ///
4094 /// Note that the numbering is applied to the whole VPlan, so printing
4095 /// individual blocks is consistent with the whole VPlan printing.
4096 void print(raw_ostream &O, const Twine &Indent,
4097 VPSlotTracker &SlotTracker) const override;
4098 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4099#endif
4100
4101 /// If the block has multiple successors, return the branch recipe terminating
4102 /// the block. If there are no or only a single successor, return nullptr;
4103 VPRecipeBase *getTerminator();
4104 const VPRecipeBase *getTerminator() const;
4105
4106 /// Returns true if the block is exiting it's parent region.
4107 bool isExiting() const;
4108
4109 /// Clone the current block and it's recipes, without updating the operands of
4110 /// the cloned recipes.
4111 VPBasicBlock *clone() override;
4112
4113 /// Returns the predecessor block at index \p Idx with the predecessors as per
4114 /// the corresponding plain CFG. If the block is an entry block to a region,
4115 /// the first predecessor is the single predecessor of a region, and the
4116 /// second predecessor is the exiting block of the region.
4117 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4118
4119protected:
4120 /// Execute the recipes in the IR basic block \p BB.
4121 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4122
4123 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4124 /// generated for this VPBB.
4125 void connectToPredecessors(VPTransformState &State);
4126
4127private:
4128 /// Create an IR BasicBlock to hold the output instructions generated by this
4129 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4130 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4131};
4132
4133inline const VPBasicBlock *
4135 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4136}
4137
4138/// A special type of VPBasicBlock that wraps an existing IR basic block.
4139/// Recipes of the block get added before the first non-phi instruction in the
4140/// wrapped block.
4141/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4142/// preheader block.
4143class VPIRBasicBlock : public VPBasicBlock {
4144 friend class VPlan;
4145
4146 BasicBlock *IRBB;
4147
4148 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4149 VPIRBasicBlock(BasicBlock *IRBB)
4150 : VPBasicBlock(VPIRBasicBlockSC,
4151 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4152 IRBB(IRBB) {}
4153
4154public:
4155 ~VPIRBasicBlock() override = default;
4156
4157 static inline bool classof(const VPBlockBase *V) {
4158 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4159 }
4160
4161 /// The method which generates the output IR instructions that correspond to
4162 /// this VPBasicBlock, thereby "executing" the VPlan.
4163 void execute(VPTransformState *State) override;
4164
4165 VPIRBasicBlock *clone() override;
4166
4167 BasicBlock *getIRBasicBlock() const { return IRBB; }
4168};
4169
4170/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4171/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4172/// A VPRegionBlock may indicate that its contents are to be replicated several
4173/// times. This is designed to support predicated scalarization, in which a
4174/// scalar if-then code structure needs to be generated VF * UF times. Having
4175/// this replication indicator helps to keep a single model for multiple
4176/// candidate VF's. The actual replication takes place only once the desired VF
4177/// and UF have been determined.
4178class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4179 friend class VPlan;
4180
4181 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4182 VPBlockBase *Entry;
4183
4184 /// Hold the Single Exiting block of the SESE region modelled by the
4185 /// VPRegionBlock.
4186 VPBlockBase *Exiting;
4187
4188 /// An indicator whether this region is to generate multiple replicated
4189 /// instances of output IR corresponding to its VPBlockBases.
4190 bool IsReplicator;
4191
4192 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4193 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4194 const std::string &Name = "", bool IsReplicator = false)
4195 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4196 IsReplicator(IsReplicator) {
4197 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4198 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4199 Entry->setParent(this);
4200 Exiting->setParent(this);
4201 }
4202 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4203 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4204 IsReplicator(IsReplicator) {}
4205
4206public:
4207 ~VPRegionBlock() override = default;
4208
4209 /// Method to support type inquiry through isa, cast, and dyn_cast.
4210 static inline bool classof(const VPBlockBase *V) {
4211 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4212 }
4213
4214 const VPBlockBase *getEntry() const { return Entry; }
4215 VPBlockBase *getEntry() { return Entry; }
4216
4217 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4218 /// EntryBlock must have no predecessors.
4219 void setEntry(VPBlockBase *EntryBlock) {
4220 assert(EntryBlock->getPredecessors().empty() &&
4221 "Entry block cannot have predecessors.");
4222 Entry = EntryBlock;
4223 EntryBlock->setParent(this);
4224 }
4225
4226 const VPBlockBase *getExiting() const { return Exiting; }
4227 VPBlockBase *getExiting() { return Exiting; }
4228
4229 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4230 /// ExitingBlock must have no successors.
4231 void setExiting(VPBlockBase *ExitingBlock) {
4232 assert(ExitingBlock->getSuccessors().empty() &&
4233 "Exit block cannot have successors.");
4234 Exiting = ExitingBlock;
4235 ExitingBlock->setParent(this);
4236 }
4237
4238 /// Returns the pre-header VPBasicBlock of the loop region.
4240 assert(!isReplicator() && "should only get pre-header of loop regions");
4241 return getSinglePredecessor()->getExitingBasicBlock();
4242 }
4243
4244 /// An indicator whether this region is to generate multiple replicated
4245 /// instances of output IR corresponding to its VPBlockBases.
4246 bool isReplicator() const { return IsReplicator; }
4247
4248 /// The method which generates the output IR instructions that correspond to
4249 /// this VPRegionBlock, thereby "executing" the VPlan.
4250 void execute(VPTransformState *State) override;
4251
4252 // Return the cost of this region.
4253 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4254
4255#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4256 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4257 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4258 /// consequtive numbers.
4259 ///
4260 /// Note that the numbering is applied to the whole VPlan, so printing
4261 /// individual regions is consistent with the whole VPlan printing.
4262 void print(raw_ostream &O, const Twine &Indent,
4263 VPSlotTracker &SlotTracker) const override;
4264 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4265#endif
4266
4267 /// Clone all blocks in the single-entry single-exit region of the block and
4268 /// their recipes without updating the operands of the cloned recipes.
4269 VPRegionBlock *clone() override;
4270
4271 /// Remove the current region from its VPlan, connecting its predecessor to
4272 /// its entry, and its exiting block to its successor.
4273 void dissolveToCFGLoop();
4274
4275 /// Returns the canonical induction recipe of the region.
4277 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4278 if (EntryVPBB->empty()) {
4279 // VPlan native path. TODO: Unify both code paths.
4280 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4281 }
4282 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4283 }
4285 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4286 }
4287
4288 /// Return the type of the canonical IV for loop regions.
4289 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4290 const Type *getCanonicalIVType() const {
4291 return getCanonicalIV()->getScalarType();
4292 }
4293};
4294
4296 return getParent()->getParent();
4297}
4298
4300 return getParent()->getParent();
4301}
4302
4303/// VPlan models a candidate for vectorization, encoding various decisions take
4304/// to produce efficient output IR, including which branches, basic-blocks and
4305/// output IR instructions to generate, and their cost. VPlan holds a
4306/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4307/// VPBasicBlock.
4308class VPlan {
4309 friend class VPlanPrinter;
4310 friend class VPSlotTracker;
4311
4312 /// VPBasicBlock corresponding to the original preheader. Used to place
4313 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4314 /// rest of VPlan execution.
4315 /// When this VPlan is used for the epilogue vector loop, the entry will be
4316 /// replaced by a new entry block created during skeleton creation.
4317 VPBasicBlock *Entry;
4318
4319 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4320 VPIRBasicBlock *ScalarHeader;
4321
4322 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4323 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4324 /// e.g. if the scalar epilogue always executes.
4326
4327 /// Holds the VFs applicable to this VPlan.
4329
4330 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4331 /// any UF.
4333
4334 /// Holds the name of the VPlan, for printing.
4335 std::string Name;
4336
4337 /// Represents the trip count of the original loop, for folding
4338 /// the tail.
4339 VPValue *TripCount = nullptr;
4340
4341 /// Represents the backedge taken count of the original loop, for folding
4342 /// the tail. It equals TripCount - 1.
4343 VPValue *BackedgeTakenCount = nullptr;
4344
4345 /// Represents the vector trip count.
4346 VPValue VectorTripCount;
4347
4348 /// Represents the vectorization factor of the loop.
4349 VPValue VF;
4350
4351 /// Represents the loop-invariant VF * UF of the vector loop region.
4352 VPValue VFxUF;
4353
4354 /// Holds a mapping between Values and their corresponding VPValue inside
4355 /// VPlan.
4356 Value2VPValueTy Value2VPValue;
4357
4358 /// Contains all the external definitions created for this VPlan. External
4359 /// definitions are VPValues that hold a pointer to their underlying IR.
4361
4362 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4363 /// VPlan is destroyed.
4364 SmallVector<VPBlockBase *> CreatedBlocks;
4365
4366 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4367 /// wrapping the original header of the scalar loop.
4368 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4369 : Entry(Entry), ScalarHeader(ScalarHeader) {
4370 Entry->setPlan(this);
4371 assert(ScalarHeader->getNumSuccessors() == 0 &&
4372 "scalar header must be a leaf node");
4373 }
4374
4375public:
4376 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4377 /// original preheader and scalar header of \p L, to be used as entry and
4378 /// scalar header blocks of the new VPlan.
4379 VPlan(Loop *L);
4380
4381 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4382 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4383 VPlan(BasicBlock *ScalarHeaderBB) {
4384 setEntry(createVPBasicBlock("preheader"));
4385 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4386 }
4387
4389
4391 Entry = VPBB;
4392 VPBB->setPlan(this);
4393 }
4394
4395 /// Generate the IR code for this VPlan.
4396 void execute(VPTransformState *State);
4397
4398 /// Return the cost of this plan.
4400
4401 VPBasicBlock *getEntry() { return Entry; }
4402 const VPBasicBlock *getEntry() const { return Entry; }
4403
4404 /// Returns the preheader of the vector loop region, if one exists, or null
4405 /// otherwise.
4407 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4408 return VectorRegion
4409 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4410 : nullptr;
4411 }
4412
4413 /// Returns the VPRegionBlock of the vector loop.
4416
4417 /// Returns the 'middle' block of the plan, that is the block that selects
4418 /// whether to execute the scalar tail loop or the exit block from the loop
4419 /// latch. If there is an early exit from the vector loop, the middle block
4420 /// conceptully has the early exit block as third successor, split accross 2
4421 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4422 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4423 /// known to always execute, the middle block may branch directly to that
4424 /// block. This function cannot be called once the vector loop region has been
4425 /// removed.
4427 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4428 assert(
4429 LoopRegion &&
4430 "cannot call the function after vector loop region has been removed");
4431 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4432 if (RegionSucc->getSingleSuccessor() ||
4433 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4434 return RegionSucc;
4435 // There is an early exit. The successor of RegionSucc is the middle block.
4436 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4437 }
4438
4440 return const_cast<VPlan *>(this)->getMiddleBlock();
4441 }
4442
4443 /// Return the VPBasicBlock for the preheader of the scalar loop.
4445 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4446 }
4447
4448 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4449 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4450
4451 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4452 /// the original scalar loop.
4453 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4454
4455 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4456 /// exit block.
4458
4459 /// Returns true if \p VPBB is an exit block.
4460 bool isExitBlock(VPBlockBase *VPBB);
4461
4462 /// The trip count of the original loop.
4464 assert(TripCount && "trip count needs to be set before accessing it");
4465 return TripCount;
4466 }
4467
4468 /// Set the trip count assuming it is currently null; if it is not - use
4469 /// resetTripCount().
4470 void setTripCount(VPValue *NewTripCount) {
4471 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4472 TripCount = NewTripCount;
4473 }
4474
4475 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4476 /// the original trip count have been replaced.
4477 void resetTripCount(VPValue *NewTripCount) {
4478 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4479 "TripCount must be set when resetting");
4480 TripCount = NewTripCount;
4481 }
4482
4483 /// The backedge taken count of the original loop.
4485 if (!BackedgeTakenCount)
4486 BackedgeTakenCount = new VPValue();
4487 return BackedgeTakenCount;
4488 }
4489 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4490
4491 /// The vector trip count.
4492 VPValue &getVectorTripCount() { return VectorTripCount; }
4493
4494 /// Returns the VF of the vector loop region.
4495 VPValue &getVF() { return VF; };
4496 const VPValue &getVF() const { return VF; };
4497
4498 /// Returns VF * UF of the vector loop region.
4499 VPValue &getVFxUF() { return VFxUF; }
4500
4503 }
4504
4505 void addVF(ElementCount VF) { VFs.insert(VF); }
4506
4508 assert(hasVF(VF) && "Cannot set VF not already in plan");
4509 VFs.clear();
4510 VFs.insert(VF);
4511 }
4512
4513 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4514 bool hasScalableVF() const {
4515 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4516 }
4517
4518 /// Returns an iterator range over all VFs of the plan.
4521 return VFs;
4522 }
4523
4524 bool hasScalarVFOnly() const {
4525 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4526 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4527 "Plan with scalar VF should only have a single VF");
4528 return HasScalarVFOnly;
4529 }
4530
4531 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4532
4533 unsigned getUF() const {
4534 assert(UFs.size() == 1 && "Expected a single UF");
4535 return UFs[0];
4536 }
4537
4538 void setUF(unsigned UF) {
4539 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4540 UFs.clear();
4541 UFs.insert(UF);
4542 }
4543
4544 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4545 /// concrete UF.
4546 bool isUnrolled() const { return UFs.size() == 1; }
4547
4548 /// Return a string with the name of the plan and the applicable VFs and UFs.
4549 std::string getName() const;
4550
4551 void setName(const Twine &newName) { Name = newName.str(); }
4552
4553 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4554 /// yet) for \p V.
4556 assert(V && "Trying to get or add the VPValue of a null Value");
4557 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4558 if (Inserted) {
4559 VPValue *VPV = new VPValue(V);
4560 VPLiveIns.push_back(VPV);
4561 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4562 It->second = VPV;
4563 }
4564
4565 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4566 return It->second;
4567 }
4568
4569 /// Return a VPValue wrapping i1 true.
4570 VPValue *getTrue() { return getConstantInt(1, 1); }
4571
4572 /// Return a VPValue wrapping i1 false.
4573 VPValue *getFalse() { return getConstantInt(1, 0); }
4574
4575 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4576 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4577 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4578 }
4579
4580 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4582 bool IsSigned = false) {
4583 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4584 }
4585
4586 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4588 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4589 }
4590
4591 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4592 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4593
4594 /// Return the list of live-in VPValues available in the VPlan.
4596 assert(all_of(Value2VPValue,
4597 [this](const auto &P) {
4598 return is_contained(VPLiveIns, P.second);
4599 }) &&
4600 "all VPValues in Value2VPValue must also be in VPLiveIns");
4601 return VPLiveIns;
4602 }
4603
4604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4605 /// Print the live-ins of this VPlan to \p O.
4606 void printLiveIns(raw_ostream &O) const;
4607
4608 /// Print this VPlan to \p O.
4609 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4610
4611 /// Print this VPlan in DOT format to \p O.
4612 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4613
4614 /// Dump the plan to stderr (for debugging).
4615 LLVM_DUMP_METHOD void dump() const;
4616#endif
4617
4618 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4619 /// recipes to refer to the clones, and return it.
4621
4622 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4623 /// present. The returned block is owned by the VPlan and deleted once the
4624 /// VPlan is destroyed.
4626 VPRecipeBase *Recipe = nullptr) {
4627 auto *VPB = new VPBasicBlock(Name, Recipe);
4628 CreatedBlocks.push_back(VPB);
4629 return VPB;
4630 }
4631
4632 /// Create a new loop region with \p Name and entry and exiting blocks set
4633 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4634 /// owned by the VPlan and deleted once the VPlan is destroyed.
4635 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4636 VPBlockBase *Entry = nullptr,
4637 VPBlockBase *Exiting = nullptr) {
4638 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4639 : new VPRegionBlock(Name);
4640 CreatedBlocks.push_back(VPB);
4641 return VPB;
4642 }
4643
4644 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4645 /// returned block is owned by the VPlan and deleted once the VPlan is
4646 /// destroyed.
4648 const std::string &Name = "") {
4649 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4650 CreatedBlocks.push_back(VPB);
4651 return VPB;
4652 }
4653
4654 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4655 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4656 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4658
4659 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4660 /// instructions in \p IRBB, except its terminator which is managed by the
4661 /// successors of the block in VPlan. The returned block is owned by the VPlan
4662 /// and deleted once the VPlan is destroyed.
4664
4665 /// Returns true if the VPlan is based on a loop with an early exit. That is
4666 /// the case if the VPlan has either more than one exit block or a single exit
4667 /// block with multiple predecessors (one for the exit via the latch and one
4668 /// via the other early exit).
4669 bool hasEarlyExit() const {
4670 return count_if(ExitBlocks,
4671 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4672 1 ||
4673 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4674 }
4675
4676 /// Returns true if the scalar tail may execute after the vector loop. Note
4677 /// that this relies on unneeded branches to the scalar tail loop being
4678 /// removed.
4679 bool hasScalarTail() const {
4680 return !(!getScalarPreheader()->hasPredecessors() ||
4682 }
4683};
4684
4685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4686inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4687 Plan.print(OS);
4688 return OS;
4689}
4690#endif
4691
4692} // end namespace llvm
4693
4694#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:508
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:337
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3637
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3631
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3990
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4018
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4065
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4020
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4017
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4043
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4001
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4007
iterator end()
Definition VPlan.h:4027
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4025
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4019
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4078
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:4011
const_reverse_iterator rbegin() const
Definition VPlan.h:4031
reverse_iterator rend()
Definition VPlan.h:4032
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4005
VPRecipeBase & back()
Definition VPlan.h:4040
const VPRecipeBase & front() const
Definition VPlan.h:4037
const_iterator begin() const
Definition VPlan.h:4026
VPRecipeBase & front()
Definition VPlan.h:4038
const VPRecipeBase & back() const
Definition VPlan.h:4039
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4056
bool empty() const
Definition VPlan.h:4036
const_iterator end() const
Definition VPlan.h:4028
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4051
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4046
reverse_iterator rbegin()
Definition VPlan.h:4030
friend class VPlan
Definition VPlan.h:3991
size_t size() const
Definition VPlan.h:4035
const_reverse_iterator rend() const
Definition VPlan.h:4033
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2516
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2521
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2511
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2532
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2541
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2498
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2493
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2527
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2507
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
VPBlocksTy & getPredecessors()
Definition VPlan.h:204
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:201
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:369
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:321
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:159
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:257
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:334
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:327
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:157
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:173
const std::string & getName() const
Definition VPlan.h:163
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:309
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:247
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:281
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:306
friend class VPBlockUtils
Definition VPlan.h:81
unsigned getVPBlockID() const
Definition VPlan.h:170
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:348
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:313
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlocksTy & getSuccessors()
Definition VPlan.h:198
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:270
void setParent(VPRegionBlock *P)
Definition VPlan.h:183
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3061
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3045
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3069
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3042
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3571
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3597
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3578
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3604
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3573
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3592
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3586
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3611
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:310
friend class VPValue
Definition VPlanValue.h:311
VPDef(const unsigned char SC)
Definition VPlanValue.h:390
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3779
VPValue * getStepValue() const
Definition VPlan.h:3790
Type * getScalarType() const
Definition VPlan.h:3785
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3767
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3759
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3793
VPValue * getStartValue() const
Definition VPlan.h:3789
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3751
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3687
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3668
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3674
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3680
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3663
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3546
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3551
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3537
const SCEV * getSCEV() const
Definition VPlan.h:3557
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3542
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3196
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3178
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3160
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3148
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3134
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3126
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3130
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3190
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3128
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2062
static bool classof(const VPValue *V)
Definition VPlan.h:2072
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2097
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2102
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2086
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2094
VPValue * getStartValue() const
Definition VPlan.h:2089
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2068
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2106
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2057
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1761
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1778
unsigned getOpcode() const
Definition VPlan.h:1774
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1755
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4143
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4167
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4157
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4144
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
FastMathFlagsTy FMFs
Definition VPlan.h:680
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:740
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:732
WrapFlagsTy WrapFlags
Definition VPlan.h:674
CmpInst::Predicate CmpPredicate
Definition VPlan.h:673
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:726
GEPNoWrapFlags GEPFlags
Definition VPlan.h:678
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:858
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:675
CmpInst::Predicate getPredicate() const
Definition VPlan.h:835
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:865
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:749
ExactFlagsTy ExactFlags
Definition VPlan.h:677
bool hasNoSignedWrap() const
Definition VPlan.h:884
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:895
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:735
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:738
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:743
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:723
bool isNonNeg() const
Definition VPlan.h:867
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:850
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:853
DisjointFlagsTy DisjointFlags
Definition VPlan.h:676
unsigned AllFlags
Definition VPlan.h:682
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:841
bool hasNoUnsignedWrap() const
Definition VPlan.h:873
FCmpFlagsTy FCmpFlags
Definition VPlan.h:681
NonNegFlagsTy NonNegFlags
Definition VPlan.h:679
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:759
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:795
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:746
VPIRFlags(Instruction &I)
Definition VPlan.h:688
Instruction & getInstruction() const
Definition VPlan.h:1432
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1440
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1419
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1446
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1434
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1407
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1018
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:990
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1002
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1246
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1287
static bool classof(const VPUser *R)
Definition VPlan.h:1272
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1254
Type * getResultType() const
Definition VPlan.h:1293
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1276
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1031
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1162
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1115
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1069
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1105
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1118
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1066
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1109
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1061
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1058
@ VScale
Returns the value for vscale.
Definition VPlan.h:1120
@ CanonicalIVIncrementForPart
Definition VPlan.h:1051
bool hasResult() const
Definition VPlan.h:1186
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1226
unsigned getOpcode() const
Definition VPlan.h:1170
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
friend class VPlanSlp
Definition VPlan.h:1032
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2627
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2633
static bool classof(const VPUser *U)
Definition VPlan.h:2609
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2576
Instruction * getInsertPos() const
Definition VPlan.h:2631
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2604
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2629
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2621
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2650
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2615
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2703
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2731
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2725
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2738
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2718
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2705
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2661
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2688
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2671
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2682
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2663
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
Definition VPlan.h:2859
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned ScaleFactor, Instruction *ReductionInst=nullptr)
Definition VPlan.h:2863
~VPPartialReductionRecipe() override=default
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2900
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition VPlan.h:2897
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2881
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1305
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1327
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1322
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4134
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1347
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1314
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1332
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1336
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3253
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3235
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3246
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3231
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:473
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4295
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:484
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:407
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:478
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:453
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:388
const VPBasicBlock * getParent() const
Definition VPlan.h:408
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:458
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:397
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2937
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2916
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2940
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2927
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2465
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2440
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2454
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2430
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2459
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition VPlan.h:2468
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2471
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2462
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition VPlan.h:2753
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2829
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2797
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2782
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2813
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2833
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2835
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2825
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2827
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2831
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2775
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2791
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition VPlan.h:2761
static bool classof(const VPUser *U)
Definition VPlan.h:2803
static bool classof(const VPValue *VPV)
Definition VPlan.h:2808
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4178
const VPBlockBase * getEntry() const
Definition VPlan.h:4214
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4289
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4246
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4231
VPBlockBase * getExiting()
Definition VPlan.h:4227
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4276
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4219
const Type * getCanonicalIVType() const
Definition VPlan.h:4290
const VPBlockBase * getExiting() const
Definition VPlan.h:4226
VPBlockBase * getEntry()
Definition VPlan.h:4215
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4284
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4239
friend class VPlan
Definition VPlan.h:4179
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4210
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2959
bool isSingleScalar() const
Definition VPlan.h:3000
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2967
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3012
bool isPredicated() const
Definition VPlan.h:3002
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2981
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3005
unsigned getOpcode() const
Definition VPlan.h:3029
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3024
VPValue * getStepValue() const
Definition VPlan.h:3856
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3850
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3821
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3842
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3833
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3814
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3859
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:530
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:536
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:595
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:540
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:598
static bool classof(const VPUser *U)
Definition VPlan.h:587
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:532
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:970
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:207
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1420
operand_range operands()
Definition VPlanValue.h:275
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:251
unsigned getNumOperands() const
Definition VPlanValue.h:245
operand_iterator op_end()
Definition VPlanValue.h:273
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:246
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:226
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:269
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:268
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:183
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:193
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:178
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1940
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1961
const VPValue * getVFValue() const
Definition VPlan.h:1936
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1954
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1947
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1925
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:2014
Type * getSourceElementType() const
Definition VPlan.h:1991
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1993
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2000
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1981
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2017
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2007
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1695
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1702
const_operand_range args() const
Definition VPlan.h:1735
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1716
operand_range args()
Definition VPlan.h:1734
Function * getCalledScalarFunction() const
Definition VPlan.h:1730
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3723
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3710
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3705
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1545
Instruction::CastOps getOpcode() const
Definition VPlan.h:1581
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1584
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1553
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1566
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1881
Type * getSourceElementType() const
Definition VPlan.h:1886
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1889
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1873
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1859
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1896
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2183
static bool classof(const VPValue *V)
Definition VPlan.h:2137
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2153
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2168
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2161
PHINode * getPHINode() const
Definition VPlan.h:2163
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2125
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2149
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2166
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2175
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2132
static bool classof(const VPHeaderPHIRecipe *R)
Definition VPlan.h:2142
const VPValue * getVFValue() const
Definition VPlan.h:2156
const VPValue * getStepValue() const
Definition VPlan.h:2150
const TruncInst * getTruncInst() const
Definition VPlan.h:2257
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2238
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2213
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2230
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2256
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2204
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2273
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2252
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2265
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1595
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1626
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1666
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1675
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1612
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1681
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1647
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1678
~VPWidenIntrinsicRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1669
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3284
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3281
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3324
static bool classof(const VPUser *U)
Definition VPlan.h:3318
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3347
Instruction & Ingredient
Definition VPlan.h:3272
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3307
Instruction & getIngredient() const
Definition VPlan.h:3355
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3278
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3311
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3338
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3275
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3334
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3294
void setMask(VPValue *Mask)
Definition VPlan.h:3286
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3344
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3331
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3328
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2372
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2343
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2350
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2304
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2314
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2292
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1497
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1517
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1507
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:1501
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1534
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4308
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1106
friend class VPSlotTracker
Definition VPlan.h:4310
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1082
bool hasVF(ElementCount VF) const
Definition VPlan.h:4513
LLVMContext & getContext() const
Definition VPlan.h:4501
VPBasicBlock * getEntry()
Definition VPlan.h:4401
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4492
void setName(const Twine &newName)
Definition VPlan.h:4551
bool hasScalableVF() const
Definition VPlan.h:4514
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4499
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4495
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4463
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4570
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4484
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4520
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:890
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:868
const VPValue & getVF() const
Definition VPlan.h:4496
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:898
const VPBasicBlock * getEntry() const
Definition VPlan.h:4402
friend class VPlanPrinter
Definition VPlan.h:4309
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4587
unsigned getUF() const
Definition VPlan.h:4533
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4647
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1220
bool hasUF(unsigned UF) const
Definition VPlan.h:4531
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4453
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4576
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4489
void setVF(ElementCount VF)
Definition VPlan.h:4507
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4546
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4669
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:993
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4439
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4470
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4477
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4426
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4390
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4625
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1226
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4573
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4555
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4635
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1112
bool hasScalarVFOnly() const
Definition VPlan.h:4524
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4444
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:905
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4595
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1065
void addVF(ElementCount VF)
Definition VPlan.h:4505
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4449
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4592
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4581
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1027
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4406
void setUF(unsigned UF)
Definition VPlan.h:4538
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4679
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1153
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4383
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2425
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3922
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:199
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3955
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3969
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3974
static bool isPossible(SrcTy R)
Definition VPlan.h:3956
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3884
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3905
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3886
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3889
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3876
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2384
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2379
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2396
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1478
VPIRPhi(PHINode &PN)
Definition VPlan.h:1471
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1473
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1489
static bool classof(const VPUser *U)
Definition VPlan.h:1365
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1380
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1395
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1362
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1375
static bool classof(const VPValue *V)
Definition VPlan.h:1370
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:923
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:929
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:924
static bool classof(const VPValue *V)
Definition VPlan.h:949
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:956
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:944
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3402
void execute(VPTransformState &State) override
Generate the wide load or gather.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3415
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3403
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3425
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3361
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3383
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3362
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3371
A recipe for widening select instructions.
Definition VPlan.h:1794
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1805
VPWidenSelectRecipe(SelectInst *SI, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL={})
Definition VPlan.h:1795
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1826
VPValue * getCond() const
Definition VPlan.h:1821
unsigned getOpcode() const
Definition VPlan.h:1819
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3486
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3498
void execute(VPTransformState &State) override
Generate the wide store or scatter.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3511
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3487
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3501
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3443
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3461
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3452
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3467
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3444