LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
531class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPWidenSelectSC:
561 case VPRecipeBase::VPBlendSC:
562 case VPRecipeBase::VPPredInstPHISC:
563 case VPRecipeBase::VPCanonicalIVPHISC:
564 case VPRecipeBase::VPActiveLaneMaskPHISC:
565 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
566 case VPRecipeBase::VPWidenPHISC:
567 case VPRecipeBase::VPWidenIntOrFpInductionSC:
568 case VPRecipeBase::VPWidenPointerInductionSC:
569 case VPRecipeBase::VPReductionPHISC:
570 return true;
571 case VPRecipeBase::VPBranchOnMaskSC:
572 case VPRecipeBase::VPInterleaveEVLSC:
573 case VPRecipeBase::VPInterleaveSC:
574 case VPRecipeBase::VPIRInstructionSC:
575 case VPRecipeBase::VPWidenLoadEVLSC:
576 case VPRecipeBase::VPWidenLoadSC:
577 case VPRecipeBase::VPWidenStoreEVLSC:
578 case VPRecipeBase::VPWidenStoreSC:
579 case VPRecipeBase::VPHistogramSC:
580 // TODO: Widened stores don't define a value, but widened loads do. Split
581 // the recipes to be able to make widened loads VPSingleDefRecipes.
582 return false;
583 }
584 llvm_unreachable("Unhandled VPDefID");
585 }
586
587 static inline bool classof(const VPUser *U) {
588 auto *R = dyn_cast<VPRecipeBase>(U);
589 return R && classof(R);
590 }
591
592 VPSingleDefRecipe *clone() override = 0;
593
594 /// Returns the underlying instruction.
601
602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
605#endif
606};
607
608/// Class to record and manage LLVM IR flags.
610 enum class OperationType : unsigned char {
611 Cmp,
612 FCmp,
613 OverflowingBinOp,
614 Trunc,
615 DisjointOp,
616 PossiblyExactOp,
617 GEPOp,
618 FPMathOp,
619 NonNegOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669
670 OperationType OpType;
671
672 union {
677 ExactFlagsTy ExactFlags;
680 FastMathFlagsTy FMFs;
681 FCmpFlagsTy FCmpFlags;
682 unsigned AllFlags;
683 };
684
685public:
686 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
687
689 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
690 OpType = OperationType::FCmp;
691 FCmpFlags.Pred = FCmp->getPredicate();
692 FCmpFlags.FMFs = FCmp->getFastMathFlags();
693 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
694 OpType = OperationType::Cmp;
695 CmpPredicate = Op->getPredicate();
696 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
697 OpType = OperationType::DisjointOp;
698 DisjointFlags.IsDisjoint = Op->isDisjoint();
699 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
700 OpType = OperationType::OverflowingBinOp;
701 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
702 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
703 OpType = OperationType::Trunc;
704 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
705 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
706 OpType = OperationType::PossiblyExactOp;
707 ExactFlags.IsExact = Op->isExact();
708 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
709 OpType = OperationType::GEPOp;
710 GEPFlags = GEP->getNoWrapFlags();
711 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
712 OpType = OperationType::NonNegOp;
713 NonNegFlags.NonNeg = PNNI->hasNonNeg();
714 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
715 OpType = OperationType::FPMathOp;
716 FMFs = Op->getFastMathFlags();
717 } else {
718 OpType = OperationType::Other;
719 AllFlags = 0;
720 }
721 }
722
724 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
725
727 : OpType(OperationType::FCmp) {
728 FCmpFlags.Pred = Pred;
729 FCmpFlags.FMFs = FMFs;
730 }
731
733 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
734
736 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
737
738 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
739
741 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
742
744 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
745
747 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
748
750 OpType = Other.OpType;
751 AllFlags = Other.AllFlags;
752 }
753
754 /// Only keep flags also present in \p Other. \p Other must have the same
755 /// OpType as the current object.
756 void intersectFlags(const VPIRFlags &Other);
757
758 /// Drop all poison-generating flags.
760 // NOTE: This needs to be kept in-sync with
761 // Instruction::dropPoisonGeneratingFlags.
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 WrapFlags.HasNUW = false;
765 WrapFlags.HasNSW = false;
766 break;
767 case OperationType::Trunc:
768 TruncFlags.HasNUW = false;
769 TruncFlags.HasNSW = false;
770 break;
771 case OperationType::DisjointOp:
772 DisjointFlags.IsDisjoint = false;
773 break;
774 case OperationType::PossiblyExactOp:
775 ExactFlags.IsExact = false;
776 break;
777 case OperationType::GEPOp:
779 break;
780 case OperationType::FPMathOp:
781 case OperationType::FCmp:
782 getFMFsRef().NoNaNs = false;
783 getFMFsRef().NoInfs = false;
784 break;
785 case OperationType::NonNegOp:
786 NonNegFlags.NonNeg = false;
787 break;
788 case OperationType::Cmp:
789 case OperationType::Other:
790 break;
791 }
792 }
793
794 /// Apply the IR flags to \p I.
795 void applyFlags(Instruction &I) const {
796 switch (OpType) {
797 case OperationType::OverflowingBinOp:
798 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
799 I.setHasNoSignedWrap(WrapFlags.HasNSW);
800 break;
801 case OperationType::Trunc:
802 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
803 I.setHasNoSignedWrap(TruncFlags.HasNSW);
804 break;
805 case OperationType::DisjointOp:
806 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
807 break;
808 case OperationType::PossiblyExactOp:
809 I.setIsExact(ExactFlags.IsExact);
810 break;
811 case OperationType::GEPOp:
812 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
813 break;
814 case OperationType::FPMathOp:
815 case OperationType::FCmp: {
816 const FastMathFlagsTy &F = getFMFsRef();
817 I.setHasAllowReassoc(F.AllowReassoc);
818 I.setHasNoNaNs(F.NoNaNs);
819 I.setHasNoInfs(F.NoInfs);
820 I.setHasNoSignedZeros(F.NoSignedZeros);
821 I.setHasAllowReciprocal(F.AllowReciprocal);
822 I.setHasAllowContract(F.AllowContract);
823 I.setHasApproxFunc(F.ApproxFunc);
824 break;
825 }
826 case OperationType::NonNegOp:
827 I.setNonNeg(NonNegFlags.NonNeg);
828 break;
829 case OperationType::Cmp:
830 case OperationType::Other:
831 break;
832 }
833 }
834
836 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
837 "recipe doesn't have a compare predicate");
838 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
839 }
840
842 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
843 "recipe doesn't have a compare predicate");
844 if (OpType == OperationType::FCmp)
845 FCmpFlags.Pred = Pred;
846 else
847 CmpPredicate = Pred;
848 }
849
851
852 /// Returns true if the recipe has a comparison predicate.
853 bool hasPredicate() const {
854 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
855 }
856
857 /// Returns true if the recipe has fast-math flags.
858 bool hasFastMathFlags() const {
859 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
860 }
861
863
864 /// Returns true if the recipe has non-negative flag.
865 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
866
867 bool isNonNeg() const {
868 assert(OpType == OperationType::NonNegOp &&
869 "recipe doesn't have a NNEG flag");
870 return NonNegFlags.NonNeg;
871 }
872
873 bool hasNoUnsignedWrap() const {
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 return WrapFlags.HasNUW;
877 case OperationType::Trunc:
878 return TruncFlags.HasNUW;
879 default:
880 llvm_unreachable("recipe doesn't have a NUW flag");
881 }
882 }
883
884 bool hasNoSignedWrap() const {
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 return WrapFlags.HasNSW;
888 case OperationType::Trunc:
889 return TruncFlags.HasNSW;
890 default:
891 llvm_unreachable("recipe doesn't have a NSW flag");
892 }
893 }
894
895 bool isDisjoint() const {
896 assert(OpType == OperationType::DisjointOp &&
897 "recipe cannot have a disjoing flag");
898 return DisjointFlags.IsDisjoint;
899 }
900
901private:
902 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
903 FastMathFlagsTy &getFMFsRef() {
904 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
905 }
906 const FastMathFlagsTy &getFMFsRef() const {
907 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
908 }
909
910public:
911#if !defined(NDEBUG)
912 /// Returns true if the set flags are valid for \p Opcode.
913 bool flagsValidForOpcode(unsigned Opcode) const;
914#endif
915
916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
917 void printFlags(raw_ostream &O) const;
918#endif
919};
920
921/// A pure-virtual common base class for recipes defining a single VPValue and
922/// using IR flags.
924 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
925 const VPIRFlags &Flags,
927 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
928
929 static inline bool classof(const VPRecipeBase *R) {
930 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
935 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
936 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
937 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
938 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
939 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
940 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
941 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942 }
943
944 static inline bool classof(const VPUser *U) {
945 auto *R = dyn_cast<VPRecipeBase>(U);
946 return R && classof(R);
947 }
948
949 static inline bool classof(const VPValue *V) {
950 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
951 return R && classof(R);
952 }
953
954 VPRecipeWithIRFlags *clone() override = 0;
955
956 static inline bool classof(const VPSingleDefRecipe *U) {
957 auto *R = dyn_cast<VPRecipeBase>(U);
958 return R && classof(R);
959 }
960
961 void execute(VPTransformState &State) override = 0;
962
963 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
965 VPCostContext &Ctx) const;
966};
967
968/// Helper to access the operand that contains the unroll part for this recipe
969/// after unrolling.
970template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
971protected:
972 /// Return the VPValue operand containing the unroll part or null if there is
973 /// no such operand.
974 VPValue *getUnrollPartOperand(const VPUser &U) const;
975
976 /// Return the unroll part.
977 unsigned getUnrollPart(const VPUser &U) const;
978};
979
980/// Helper to manage IR metadata for recipes. It filters out metadata that
981/// cannot be propagated.
984
985public:
986 VPIRMetadata() = default;
987
988 /// Adds metatadata that can be preserved from the original instruction
989 /// \p I.
991
992 /// Copy constructor for cloning.
993 VPIRMetadata(const VPIRMetadata &Other) = default;
994
996
997 /// Add all metadata to \p I.
998 void applyMetadata(Instruction &I) const;
999
1000 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1001 /// already exists, it will be replaced. Otherwise, it will be added.
1002 void setMetadata(unsigned Kind, MDNode *Node) {
1003 auto It =
1004 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1005 return P.first == Kind;
1006 });
1007 if (It != Metadata.end())
1008 It->second = Node;
1009 else
1010 Metadata.emplace_back(Kind, Node);
1011 }
1012
1013 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
1014 /// nodes that are common to both.
1015 void intersect(const VPIRMetadata &MD);
1016
1017 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1018 MDNode *getMetadata(unsigned Kind) const {
1019 auto It =
1020 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1021 return It != Metadata.end() ? It->second : nullptr;
1022 }
1023};
1024
1025/// This is a concrete Recipe that models a single VPlan-level instruction.
1026/// While as any Recipe it may generate a sequence of IR instructions when
1027/// executed, these instructions would always form a single-def expression as
1028/// the VPInstruction is also a single def-use vertex.
1030 public VPIRMetadata,
1031 public VPUnrollPartAccessor<1> {
1032 friend class VPlanSlp;
1033
1034public:
1035 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1036 enum {
1038 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1039 // values of a first-order recurrence.
1043 // Creates a mask where each lane is active (true) whilst the current
1044 // counter (first operand + index) is less than the second operand. i.e.
1045 // mask[i] = icmpt ult (op0 + i), op1
1046 // The size of the mask returned is VF * Multiplier (UF, third op).
1050 // Increment the canonical IV separately for each unrolled part.
1055 /// Given operands of (the same) struct type, creates a struct of fixed-
1056 /// width vectors each containing a struct field of all operands. The
1057 /// number of operands matches the element count of every vector.
1059 /// Creates a fixed-width vector containing all operands. The number of
1060 /// operands matches the vector element count.
1062 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1063 /// abstract VPInstruction whose single defined VPValue represents VF
1064 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1065 /// VPInstructions.
1067 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1068 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1072 // Extracts the last lane from its operand if it is a vector, or the last
1073 // part if scalar. In the latter case, the recipe will be removed during
1074 // unrolling.
1076 // Extracts the last lane for each part from its operand.
1078 // Extracts the second-to-last lane from its operand or the second-to-last
1079 // part if it is scalar. In the latter case, the recipe will be removed
1080 // during unrolling.
1082 LogicalAnd, // Non-poison propagating logical And.
1083 // Add an offset in bytes (second operand) to a base pointer (first
1084 // operand). Only generates scalar values (either for the first lane only or
1085 // for all lanes, depending on its uses).
1087 // Add a vector offset in bytes (second operand) to a scalar base pointer
1088 // (first operand).
1090 // Returns a scalar boolean value, which is true if any lane of its
1091 // (boolean) vector operands is true. It produces the reduced value across
1092 // all unrolled iterations. Unrolling will add all copies of its original
1093 // operand as additional operands. AnyOf is poison-safe as all operands
1094 // will be frozen.
1096 // Calculates the first active lane index of the vector predicate operands.
1097 // It produces the lane index across all unrolled iterations. Unrolling will
1098 // add all copies of its original operand as additional operands.
1099 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1100 // result even with operands that are all zeroes.
1102 // Calculates the last active lane index of the vector predicate operands.
1103 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1104 // tail-folding to extract the correct live-out value from the last active
1105 // iteration. It produces the lane index across all unrolled iterations.
1106 // Unrolling will add all copies of its original operand as additional
1107 // operands.
1109
1110 // The opcodes below are used for VPInstructionWithType.
1111 //
1112 /// Scale the first operand (vector step) by the second operand
1113 /// (scalar-step). Casts both operands to the result type if needed.
1115 /// Start vector for reductions with 3 operands: the original start value,
1116 /// the identity value for the reduction and an integer indicating the
1117 /// scaling factor.
1119 // Creates a step vector starting from 0 to VF with a step of 1.
1121 /// Extracts a single lane (first operand) from a set of vector operands.
1122 /// The lane specifies an index into a vector formed by combining all vector
1123 /// operands (all operands after the first one).
1125 /// Explicit user for the resume phi of the canonical induction in the main
1126 /// VPlan, used by the epilogue vector loop.
1128 /// Returns the value for vscale.
1131 };
1132
1133 /// Returns true if this VPInstruction generates scalar values for all lanes.
1134 /// Most VPInstructions generate a single value per part, either vector or
1135 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1136 /// values per all lanes, stemming from an original ingredient. This method
1137 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1138 /// underlying ingredient.
1139 bool doesGeneratePerAllLanes() const;
1140
1141private:
1142 typedef unsigned char OpcodeTy;
1143 OpcodeTy Opcode;
1144
1145 /// An optional name that can be used for the generated IR instruction.
1146 std::string Name;
1147
1148 /// Returns true if we can generate a scalar for the first lane only if
1149 /// needed.
1150 bool canGenerateScalarForFirstLane() const;
1151
1152 /// Utility methods serving execute(): generates a single vector instance of
1153 /// the modeled instruction. \returns the generated value. . In some cases an
1154 /// existing value is returned rather than a generated one.
1155 Value *generate(VPTransformState &State);
1156
1157#if !defined(NDEBUG)
1158 /// Return the number of operands determined by the opcode of the
1159 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1160 /// directly by the opcode.
1161 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1162#endif
1163
1164public:
1165 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1166 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1167 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1168
1169 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1170
1171 VPInstruction *clone() override {
1172 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1173 getDebugLoc(), Name);
1174 if (getUnderlyingValue())
1175 New->setUnderlyingValue(getUnderlyingInstr());
1176 return New;
1177 }
1178
1179 unsigned getOpcode() const { return Opcode; }
1180
1181 /// Generate the instruction.
1182 /// TODO: We currently execute only per-part unless a specific instance is
1183 /// provided.
1184 void execute(VPTransformState &State) override;
1185
1186 /// Return the cost of this VPInstruction.
1187 InstructionCost computeCost(ElementCount VF,
1188 VPCostContext &Ctx) const override;
1189
1190#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1191 /// Print the VPInstruction to dbgs() (for debugging).
1192 LLVM_DUMP_METHOD void dump() const;
1193#endif
1194
1195 bool hasResult() const {
1196 // CallInst may or may not have a result, depending on the called function.
1197 // Conservatively return calls have results for now.
1198 switch (getOpcode()) {
1199 case Instruction::Ret:
1200 case Instruction::Br:
1201 case Instruction::Store:
1202 case Instruction::Switch:
1203 case Instruction::IndirectBr:
1204 case Instruction::Resume:
1205 case Instruction::CatchRet:
1206 case Instruction::Unreachable:
1207 case Instruction::Fence:
1208 case Instruction::AtomicRMW:
1211 return false;
1212 default:
1213 return true;
1214 }
1215 }
1216
1217 /// Returns true if the underlying opcode may read from or write to memory.
1218 bool opcodeMayReadOrWriteFromMemory() const;
1219
1220 /// Returns true if the recipe only uses the first lane of operand \p Op.
1221 bool usesFirstLaneOnly(const VPValue *Op) const override;
1222
1223 /// Returns true if the recipe only uses the first part of operand \p Op.
1224 bool usesFirstPartOnly(const VPValue *Op) const override;
1225
1226 /// Returns true if this VPInstruction produces a scalar value from a vector,
1227 /// e.g. by performing a reduction or extracting a lane.
1228 bool isVectorToScalar() const;
1229
1230 /// Returns true if this VPInstruction's operands are single scalars and the
1231 /// result is also a single scalar.
1232 bool isSingleScalar() const;
1233
1234 /// Returns the symbolic name assigned to the VPInstruction.
1235 StringRef getName() const { return Name; }
1236
1237 /// Set the symbolic name for the VPInstruction.
1238 void setName(StringRef NewName) { Name = NewName.str(); }
1239
1240protected:
1241#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1242 /// Print the VPInstruction to \p O.
1243 void printRecipe(raw_ostream &O, const Twine &Indent,
1244 VPSlotTracker &SlotTracker) const override;
1245#endif
1246};
1247
1248/// A specialization of VPInstruction augmenting it with a dedicated result
1249/// type, to be used when the opcode and operands of the VPInstruction don't
1250/// directly determine the result type. Note that there is no separate VPDef ID
1251/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1252/// distinguished purely by the opcode.
1254 /// Scalar result type produced by the recipe.
1255 Type *ResultTy;
1256
1257public:
1259 Type *ResultTy, const VPIRFlags &Flags = {},
1260 const VPIRMetadata &Metadata = {},
1262 const Twine &Name = "")
1263 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1264 ResultTy(ResultTy) {}
1265
1266 static inline bool classof(const VPRecipeBase *R) {
1267 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1268 // type information.
1269 if (R->isScalarCast())
1270 return true;
1271 auto *VPI = dyn_cast<VPInstruction>(R);
1272 if (!VPI)
1273 return false;
1274 switch (VPI->getOpcode()) {
1278 return true;
1279 default:
1280 return false;
1281 }
1282 }
1283
1284 static inline bool classof(const VPUser *R) {
1286 }
1287
1288 VPInstruction *clone() override {
1289 auto *New =
1291 *this, *this, getDebugLoc(), getName());
1292 New->setUnderlyingValue(getUnderlyingValue());
1293 return New;
1294 }
1295
1296 void execute(VPTransformState &State) override;
1297
1298 /// Return the cost of this VPInstruction.
1300 VPCostContext &Ctx) const override {
1301 // TODO: Compute accurate cost after retiring the legacy cost model.
1302 return 0;
1303 }
1304
1305 Type *getResultType() const { return ResultTy; }
1306
1307protected:
1308#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1309 /// Print the recipe.
1310 void printRecipe(raw_ostream &O, const Twine &Indent,
1311 VPSlotTracker &SlotTracker) const override;
1312#endif
1313};
1314
1315/// Helper type to provide functions to access incoming values and blocks for
1316/// phi-like recipes.
1318protected:
1319 /// Return a VPRecipeBase* to the current object.
1320 virtual const VPRecipeBase *getAsRecipe() const = 0;
1321
1322public:
1323 virtual ~VPPhiAccessors() = default;
1324
1325 /// Returns the incoming VPValue with index \p Idx.
1326 VPValue *getIncomingValue(unsigned Idx) const {
1327 return getAsRecipe()->getOperand(Idx);
1328 }
1329
1330 /// Returns the incoming block with index \p Idx.
1331 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1332
1333 /// Returns the number of incoming values, also number of incoming blocks.
1334 virtual unsigned getNumIncoming() const {
1335 return getAsRecipe()->getNumOperands();
1336 }
1337
1338 /// Returns an interator range over the incoming values.
1340 return make_range(getAsRecipe()->op_begin(),
1341 getAsRecipe()->op_begin() + getNumIncoming());
1342 }
1343
1345 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1346
1347 /// Returns an iterator range over the incoming blocks.
1349 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1350 return getIncomingBlock(Idx);
1351 };
1352 return map_range(index_range(0, getNumIncoming()), GetBlock);
1353 }
1354
1355 /// Returns an iterator range over pairs of incoming values and corresponding
1356 /// incoming blocks.
1362
1363 /// Removes the incoming value for \p IncomingBlock, which must be a
1364 /// predecessor.
1365 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1366
1367#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1368 /// Print the recipe.
1370#endif
1371};
1372
1374 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1375 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1376
1377 static inline bool classof(const VPUser *U) {
1378 auto *VPI = dyn_cast<VPInstruction>(U);
1379 return VPI && VPI->getOpcode() == Instruction::PHI;
1380 }
1381
1382 static inline bool classof(const VPValue *V) {
1383 auto *VPI = dyn_cast<VPInstruction>(V);
1384 return VPI && VPI->getOpcode() == Instruction::PHI;
1385 }
1386
1387 static inline bool classof(const VPSingleDefRecipe *SDR) {
1388 auto *VPI = dyn_cast<VPInstruction>(SDR);
1389 return VPI && VPI->getOpcode() == Instruction::PHI;
1390 }
1391
1392 VPPhi *clone() override {
1393 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1394 PhiR->setUnderlyingValue(getUnderlyingValue());
1395 return PhiR;
1396 }
1397
1398 void execute(VPTransformState &State) override;
1399
1400protected:
1401#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1402 /// Print the recipe.
1403 void printRecipe(raw_ostream &O, const Twine &Indent,
1404 VPSlotTracker &SlotTracker) const override;
1405#endif
1406
1407 const VPRecipeBase *getAsRecipe() const override { return this; }
1408};
1409
1410/// A recipe to wrap on original IR instruction not to be modified during
1411/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1412/// Expect PHIs, VPIRInstructions cannot have any operands.
1414 Instruction &I;
1415
1416protected:
1417 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1418 /// subclasses may need to be created, e.g. VPIRPhi.
1420 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1421
1422public:
1423 ~VPIRInstruction() override = default;
1424
1425 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1426 /// VPIRInstruction.
1428
1429 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1430
1432 auto *R = create(I);
1433 for (auto *Op : operands())
1434 R->addOperand(Op);
1435 return R;
1436 }
1437
1438 void execute(VPTransformState &State) override;
1439
1440 /// Return the cost of this VPIRInstruction.
1442 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1443
1444 Instruction &getInstruction() const { return I; }
1445
1446 bool usesScalars(const VPValue *Op) const override {
1448 "Op must be an operand of the recipe");
1449 return true;
1450 }
1451
1452 bool usesFirstPartOnly(const VPValue *Op) const override {
1454 "Op must be an operand of the recipe");
1455 return true;
1456 }
1457
1458 bool usesFirstLaneOnly(const VPValue *Op) const override {
1460 "Op must be an operand of the recipe");
1461 return true;
1462 }
1463
1464 /// Update the recipes first operand to the last lane of the operand using \p
1465 /// Builder. Must only be used for VPIRInstructions with at least one operand
1466 /// wrapping a PHINode.
1468
1469protected:
1470#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1471 /// Print the recipe.
1472 void printRecipe(raw_ostream &O, const Twine &Indent,
1473 VPSlotTracker &SlotTracker) const override;
1474#endif
1475};
1476
1477/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1478/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1479/// allowed, and it is used to add a new incoming value for the single
1480/// predecessor VPBB.
1482 public VPPhiAccessors {
1484
1485 static inline bool classof(const VPRecipeBase *U) {
1486 auto *R = dyn_cast<VPIRInstruction>(U);
1487 return R && isa<PHINode>(R->getInstruction());
1488 }
1489
1491
1492 void execute(VPTransformState &State) override;
1493
1494protected:
1495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1496 /// Print the recipe.
1497 void printRecipe(raw_ostream &O, const Twine &Indent,
1498 VPSlotTracker &SlotTracker) const override;
1499#endif
1500
1501 const VPRecipeBase *getAsRecipe() const override { return this; }
1502};
1503
1504/// VPWidenRecipe is a recipe for producing a widened instruction using the
1505/// opcode and operands of the recipe. This recipe covers most of the
1506/// traditional vectorization cases where each recipe transforms into a
1507/// vectorized version of itself.
1509 public VPIRMetadata {
1510 unsigned Opcode;
1511
1512public:
1513 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1514 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1515 DebugLoc DL)
1516 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1517 VPIRMetadata(Metadata), Opcode(Opcode) {}
1518
1520 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1521 DebugLoc DL = {})
1522 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1523 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1524 setUnderlyingValue(&I);
1525 }
1526
1527 ~VPWidenRecipe() override = default;
1528
1529 VPWidenRecipe *clone() override {
1530 auto *R =
1531 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1532 R->setUnderlyingValue(getUnderlyingValue());
1533 return R;
1534 }
1535
1536 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1537
1538 /// Produce a widened instruction using the opcode and operands of the recipe,
1539 /// processing State.VF elements.
1540 void execute(VPTransformState &State) override;
1541
1542 /// Return the cost of this VPWidenRecipe.
1543 InstructionCost computeCost(ElementCount VF,
1544 VPCostContext &Ctx) const override;
1545
1546 unsigned getOpcode() const { return Opcode; }
1547
1548protected:
1549#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1550 /// Print the recipe.
1551 void printRecipe(raw_ostream &O, const Twine &Indent,
1552 VPSlotTracker &SlotTracker) const override;
1553#endif
1554};
1555
1556/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1558 /// Cast instruction opcode.
1559 Instruction::CastOps Opcode;
1560
1561 /// Result type for the cast.
1562 Type *ResultTy;
1563
1564public:
1566 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1567 const VPIRMetadata &Metadata = {},
1569 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1570 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1571 assert(flagsValidForOpcode(Opcode) &&
1572 "Set flags not supported for the provided opcode");
1574 }
1575
1576 ~VPWidenCastRecipe() override = default;
1577
1579 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1581 *this, *this, getDebugLoc());
1582 }
1583
1584 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1585
1586 /// Produce widened copies of the cast.
1587 void execute(VPTransformState &State) override;
1588
1589 /// Return the cost of this VPWidenCastRecipe.
1591 VPCostContext &Ctx) const override;
1592
1593 Instruction::CastOps getOpcode() const { return Opcode; }
1594
1595 /// Returns the result type of the cast.
1596 Type *getResultType() const { return ResultTy; }
1597
1598protected:
1599#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1600 /// Print the recipe.
1601 void printRecipe(raw_ostream &O, const Twine &Indent,
1602 VPSlotTracker &SlotTracker) const override;
1603#endif
1604};
1605
1606/// A recipe for widening vector intrinsics.
1608 /// ID of the vector intrinsic to widen.
1609 Intrinsic::ID VectorIntrinsicID;
1610
1611 /// Scalar return type of the intrinsic.
1612 Type *ResultTy;
1613
1614 /// True if the intrinsic may read from memory.
1615 bool MayReadFromMemory;
1616
1617 /// True if the intrinsic may read write to memory.
1618 bool MayWriteToMemory;
1619
1620 /// True if the intrinsic may have side-effects.
1621 bool MayHaveSideEffects;
1622
1623public:
1625 ArrayRef<VPValue *> CallArguments, Type *Ty,
1626 const VPIRFlags &Flags = {},
1627 const VPIRMetadata &MD = {},
1629 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1630 DL),
1631 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1632 MayReadFromMemory(CI.mayReadFromMemory()),
1633 MayWriteToMemory(CI.mayWriteToMemory()),
1634 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1635 setUnderlyingValue(&CI);
1636 }
1637
1639 ArrayRef<VPValue *> CallArguments, Type *Ty,
1640 const VPIRFlags &Flags = {},
1641 const VPIRMetadata &Metadata = {},
1643 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1644 DL),
1645 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1646 ResultTy(Ty) {
1647 LLVMContext &Ctx = Ty->getContext();
1648 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1649 MemoryEffects ME = Attrs.getMemoryEffects();
1650 MayReadFromMemory = !ME.onlyWritesMemory();
1651 MayWriteToMemory = !ME.onlyReadsMemory();
1652 MayHaveSideEffects = MayWriteToMemory ||
1653 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1654 !Attrs.hasAttribute(Attribute::WillReturn);
1655 }
1656
1657 ~VPWidenIntrinsicRecipe() override = default;
1658
1660 if (Value *CI = getUnderlyingValue())
1661 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1662 operands(), ResultTy, *this, *this,
1663 getDebugLoc());
1664 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1665 *this, *this, getDebugLoc());
1666 }
1667
1668 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1669
1670 /// Produce a widened version of the vector intrinsic.
1671 void execute(VPTransformState &State) override;
1672
1673 /// Return the cost of this vector intrinsic.
1675 VPCostContext &Ctx) const override;
1676
1677 /// Return the ID of the intrinsic.
1678 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1679
1680 /// Return the scalar return type of the intrinsic.
1681 Type *getResultType() const { return ResultTy; }
1682
1683 /// Return to name of the intrinsic as string.
1685
1686 /// Returns true if the intrinsic may read from memory.
1687 bool mayReadFromMemory() const { return MayReadFromMemory; }
1688
1689 /// Returns true if the intrinsic may write to memory.
1690 bool mayWriteToMemory() const { return MayWriteToMemory; }
1691
1692 /// Returns true if the intrinsic may have side-effects.
1693 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1694
1695 bool usesFirstLaneOnly(const VPValue *Op) const override;
1696
1697protected:
1698#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1699 /// Print the recipe.
1700 void printRecipe(raw_ostream &O, const Twine &Indent,
1701 VPSlotTracker &SlotTracker) const override;
1702#endif
1703};
1704
1705/// A recipe for widening Call instructions using library calls.
1707 public VPIRMetadata {
1708 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1709 /// between a given VF and the chosen vectorized variant, so there will be a
1710 /// different VPlan for each VF with a valid variant.
1711 Function *Variant;
1712
1713public:
1715 ArrayRef<VPValue *> CallArguments,
1716 const VPIRFlags &Flags = {},
1717 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1718 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1719 VPIRMetadata(Metadata), Variant(Variant) {
1720 setUnderlyingValue(UV);
1721 assert(
1722 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1723 "last operand must be the called function");
1724 }
1725
1726 ~VPWidenCallRecipe() override = default;
1727
1729 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1730 *this, *this, getDebugLoc());
1731 }
1732
1733 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1734
1735 /// Produce a widened version of the call instruction.
1736 void execute(VPTransformState &State) override;
1737
1738 /// Return the cost of this VPWidenCallRecipe.
1739 InstructionCost computeCost(ElementCount VF,
1740 VPCostContext &Ctx) const override;
1741
1745
1748
1749protected:
1750#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1751 /// Print the recipe.
1752 void printRecipe(raw_ostream &O, const Twine &Indent,
1753 VPSlotTracker &SlotTracker) const override;
1754#endif
1755};
1756
1757/// A recipe representing a sequence of load -> update -> store as part of
1758/// a histogram operation. This means there may be aliasing between vector
1759/// lanes, which is handled by the llvm.experimental.vector.histogram family
1760/// of intrinsics. The only update operations currently supported are
1761/// 'add' and 'sub' where the other term is loop-invariant.
1763 /// Opcode of the update operation, currently either add or sub.
1764 unsigned Opcode;
1765
1766public:
1767 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1769 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1770
1771 ~VPHistogramRecipe() override = default;
1772
1774 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1775 }
1776
1777 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1778
1779 /// Produce a vectorized histogram operation.
1780 void execute(VPTransformState &State) override;
1781
1782 /// Return the cost of this VPHistogramRecipe.
1784 VPCostContext &Ctx) const override;
1785
1786 unsigned getOpcode() const { return Opcode; }
1787
1788 /// Return the mask operand if one was provided, or a null pointer if all
1789 /// lanes should be executed unconditionally.
1790 VPValue *getMask() const {
1791 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1792 }
1793
1794protected:
1795#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1796 /// Print the recipe
1797 void printRecipe(raw_ostream &O, const Twine &Indent,
1798 VPSlotTracker &SlotTracker) const override;
1799#endif
1800};
1801
1802/// A recipe for widening select instructions. Supports both wide vector and
1803/// single-scalar conditions, matching the behavior of LLVM IR's select
1804/// instruction.
1806 public VPIRMetadata {
1808 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1809 DebugLoc DL = {})
1810 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
1811 VPIRMetadata(MD) {
1812 setUnderlyingValue(SI);
1813 }
1814
1815 ~VPWidenSelectRecipe() override = default;
1816
1819 operands(), *this, *this, getDebugLoc());
1820 }
1821
1822 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1823
1824 /// Produce a widened version of the select instruction.
1825 void execute(VPTransformState &State) override;
1826
1827 /// Return the cost of this VPWidenSelectRecipe.
1828 InstructionCost computeCost(ElementCount VF,
1829 VPCostContext &Ctx) const override;
1830
1831 unsigned getOpcode() const { return Instruction::Select; }
1832
1833 VPValue *getCond() const {
1834 return getOperand(0);
1835 }
1836
1837 /// Returns true if the recipe only uses the first lane of operand \p Op.
1838 bool usesFirstLaneOnly(const VPValue *Op) const override {
1840 "Op must be an operand of the recipe");
1841 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1842 }
1843
1844protected:
1845#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1846 /// Print the recipe.
1847 void printRecipe(raw_ostream &O, const Twine &Indent,
1848 VPSlotTracker &SlotTracker) const override;
1849#endif
1850};
1851
1852/// A recipe for handling GEP instructions.
1854 Type *SourceElementTy;
1855
1856 bool isPointerLoopInvariant() const {
1857 return getOperand(0)->isDefinedOutsideLoopRegions();
1858 }
1859
1860 bool isIndexLoopInvariant(unsigned I) const {
1861 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1862 }
1863
1864public:
1866 const VPIRFlags &Flags = {},
1868 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1869 SourceElementTy(GEP->getSourceElementType()) {
1870 setUnderlyingValue(GEP);
1872 (void)Metadata;
1874 assert(Metadata.empty() && "unexpected metadata on GEP");
1875 }
1876
1877 ~VPWidenGEPRecipe() override = default;
1878
1881 operands(), *this, getDebugLoc());
1882 }
1883
1884 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1885
1886 /// This recipe generates a GEP instruction.
1887 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1888
1889 /// Generate the gep nodes.
1890 void execute(VPTransformState &State) override;
1891
1892 Type *getSourceElementType() const { return SourceElementTy; }
1893
1894 /// Return the cost of this VPWidenGEPRecipe.
1896 VPCostContext &Ctx) const override {
1897 // TODO: Compute accurate cost after retiring the legacy cost model.
1898 return 0;
1899 }
1900
1901 /// Returns true if the recipe only uses the first lane of operand \p Op.
1902 bool usesFirstLaneOnly(const VPValue *Op) const override;
1903
1904protected:
1905#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1906 /// Print the recipe.
1907 void printRecipe(raw_ostream &O, const Twine &Indent,
1908 VPSlotTracker &SlotTracker) const override;
1909#endif
1910};
1911
1912/// A recipe to compute a pointer to the last element of each part of a widened
1913/// memory access for widened memory accesses of IndexedTy. Used for
1914/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1916 public VPUnrollPartAccessor<2> {
1917 Type *IndexedTy;
1918
1919 /// The constant stride of the pointer computed by this recipe, expressed in
1920 /// units of IndexedTy.
1921 int64_t Stride;
1922
1923public:
1925 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1926 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1927 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1928 IndexedTy(IndexedTy), Stride(Stride) {
1929 assert(Stride < 0 && "Stride must be negative");
1930 }
1931
1932 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1933
1935 const VPValue *getVFValue() const { return getOperand(1); }
1936
1937 void execute(VPTransformState &State) override;
1938
1939 bool usesFirstLaneOnly(const VPValue *Op) const override {
1941 "Op must be an operand of the recipe");
1942 return true;
1943 }
1944
1945 /// Return the cost of this VPVectorPointerRecipe.
1947 VPCostContext &Ctx) const override {
1948 // TODO: Compute accurate cost after retiring the legacy cost model.
1949 return 0;
1950 }
1951
1952 /// Returns true if the recipe only uses the first part of operand \p Op.
1953 bool usesFirstPartOnly(const VPValue *Op) const override {
1955 "Op must be an operand of the recipe");
1956 assert(getNumOperands() <= 2 && "must have at most two operands");
1957 return true;
1958 }
1959
1961 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1962 Stride, getGEPNoWrapFlags(),
1963 getDebugLoc());
1964 }
1965
1966protected:
1967#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1968 /// Print the recipe.
1969 void printRecipe(raw_ostream &O, const Twine &Indent,
1970 VPSlotTracker &SlotTracker) const override;
1971#endif
1972};
1973
1974/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1976 public VPUnrollPartAccessor<1> {
1977 Type *SourceElementTy;
1978
1979public:
1980 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1982 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1983 GEPFlags, DL),
1984 SourceElementTy(SourceElementTy) {}
1985
1986 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1987
1988 void execute(VPTransformState &State) override;
1989
1990 Type *getSourceElementType() const { return SourceElementTy; }
1991
1992 bool usesFirstLaneOnly(const VPValue *Op) const override {
1994 "Op must be an operand of the recipe");
1995 return true;
1996 }
1997
1998 /// Returns true if the recipe only uses the first part of operand \p Op.
1999 bool usesFirstPartOnly(const VPValue *Op) const override {
2001 "Op must be an operand of the recipe");
2002 assert(getNumOperands() <= 2 && "must have at most two operands");
2003 return true;
2004 }
2005
2007 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2009 }
2010
2011 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
2012 /// this is only accurate after the VPlan has been unrolled.
2013 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
2014
2015 /// Return the cost of this VPHeaderPHIRecipe.
2017 VPCostContext &Ctx) const override {
2018 // TODO: Compute accurate cost after retiring the legacy cost model.
2019 return 0;
2020 }
2021
2022protected:
2023#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2024 /// Print the recipe.
2025 void printRecipe(raw_ostream &O, const Twine &Indent,
2026 VPSlotTracker &SlotTracker) const override;
2027#endif
2028};
2029
2030/// A pure virtual base class for all recipes modeling header phis, including
2031/// phis for first order recurrences, pointer inductions and reductions. The
2032/// start value is the first operand of the recipe and the incoming value from
2033/// the backedge is the second operand.
2034///
2035/// Inductions are modeled using the following sub-classes:
2036/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2037/// starting at a specified value (zero for the main vector loop, the resume
2038/// value for the epilogue vector loop) and stepping by 1. The induction
2039/// controls exiting of the vector loop by comparing against the vector trip
2040/// count. Produces a single scalar PHI for the induction value per
2041/// iteration.
2042/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2043/// floating point inductions with arbitrary start and step values. Produces
2044/// a vector PHI per-part.
2045/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2046/// value of an IV with different start and step values. Produces a single
2047/// scalar value per iteration
2048/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2049/// canonical or derived induction.
2050/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2051/// pointer induction. Produces either a vector PHI per-part or scalar values
2052/// per-lane based on the canonical induction.
2054 public VPPhiAccessors {
2055protected:
2056 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2057 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2058 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2059 UnderlyingInstr, DL) {}
2060
2061 const VPRecipeBase *getAsRecipe() const override { return this; }
2062
2063public:
2064 ~VPHeaderPHIRecipe() override = default;
2065
2066 /// Method to support type inquiry through isa, cast, and dyn_cast.
2067 static inline bool classof(const VPRecipeBase *R) {
2068 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2069 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2070 }
2071 static inline bool classof(const VPValue *V) {
2072 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2073 }
2074 static inline bool classof(const VPSingleDefRecipe *R) {
2075 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2076 }
2077
2078 /// Generate the phi nodes.
2079 void execute(VPTransformState &State) override = 0;
2080
2081 /// Return the cost of this header phi recipe.
2083 VPCostContext &Ctx) const override;
2084
2085 /// Returns the start value of the phi, if one is set.
2087 return getNumOperands() == 0 ? nullptr : getOperand(0);
2088 }
2090 return getNumOperands() == 0 ? nullptr : getOperand(0);
2091 }
2092
2093 /// Update the start value of the recipe.
2095
2096 /// Returns the incoming value from the loop backedge.
2098 return getOperand(1);
2099 }
2100
2101 /// Update the incoming value from the loop backedge.
2103
2104 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2105 /// to be a recipe.
2107 return *getBackedgeValue()->getDefiningRecipe();
2108 }
2109
2110protected:
2111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2112 /// Print the recipe.
2113 void printRecipe(raw_ostream &O, const Twine &Indent,
2114 VPSlotTracker &SlotTracker) const override = 0;
2115#endif
2116};
2117
2118/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2119/// VPWidenPointerInductionRecipe), providing shared functionality, including
2120/// retrieving the step value, induction descriptor and original phi node.
2122 const InductionDescriptor &IndDesc;
2123
2124public:
2125 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2126 VPValue *Step, const InductionDescriptor &IndDesc,
2127 DebugLoc DL)
2128 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2129 addOperand(Step);
2130 }
2131
2132 static inline bool classof(const VPRecipeBase *R) {
2133 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2134 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2135 }
2136
2137 static inline bool classof(const VPValue *V) {
2138 auto *R = V->getDefiningRecipe();
2139 return R && classof(R);
2140 }
2141
2142 static inline bool classof(const VPSingleDefRecipe *R) {
2143 return classof(static_cast<const VPRecipeBase *>(R));
2144 }
2145
2146 void execute(VPTransformState &State) override = 0;
2147
2148 /// Returns the step value of the induction.
2150 const VPValue *getStepValue() const { return getOperand(1); }
2151
2152 /// Update the step value of the recipe.
2153 void setStepValue(VPValue *V) { setOperand(1, V); }
2154
2156 const VPValue *getVFValue() const { return getOperand(2); }
2157
2158 /// Returns the number of incoming values, also number of incoming blocks.
2159 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2160 /// incoming value, its start value.
2161 unsigned getNumIncoming() const override { return 1; }
2162
2164
2165 /// Returns the induction descriptor for the recipe.
2166 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2167
2169 // TODO: All operands of base recipe must exist and be at same index in
2170 // derived recipe.
2172 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2173 }
2174
2176 // TODO: All operands of base recipe must exist and be at same index in
2177 // derived recipe.
2179 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2180 }
2181
2182 /// Returns true if the recipe only uses the first lane of operand \p Op.
2183 bool usesFirstLaneOnly(const VPValue *Op) const override {
2185 "Op must be an operand of the recipe");
2186 // The recipe creates its own wide start value, so it only requests the
2187 // first lane of the operand.
2188 // TODO: Remove once creating the start value is modeled separately.
2189 return Op == getStartValue() || Op == getStepValue();
2190 }
2191};
2192
2193/// A recipe for handling phi nodes of integer and floating-point inductions,
2194/// producing their vector values. This is an abstract recipe and must be
2195/// converted to concrete recipes before executing.
2197 public VPIRFlags {
2198 TruncInst *Trunc;
2199
2200 // If this recipe is unrolled it will have 2 additional operands.
2201 bool isUnrolled() const { return getNumOperands() == 5; }
2202
2203public:
2205 VPValue *VF, const InductionDescriptor &IndDesc,
2206 const VPIRFlags &Flags, DebugLoc DL)
2207 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2208 Step, IndDesc, DL),
2209 VPIRFlags(Flags), Trunc(nullptr) {
2210 addOperand(VF);
2211 }
2212
2214 VPValue *VF, const InductionDescriptor &IndDesc,
2215 TruncInst *Trunc, const VPIRFlags &Flags,
2216 DebugLoc DL)
2217 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2218 Step, IndDesc, DL),
2219 VPIRFlags(Flags), Trunc(Trunc) {
2220 addOperand(VF);
2222 (void)Metadata;
2223 if (Trunc)
2225 assert(Metadata.empty() && "unexpected metadata on Trunc");
2226 }
2227
2229
2235
2236 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2237
2238 void execute(VPTransformState &State) override {
2239 llvm_unreachable("cannot execute this recipe, should be expanded via "
2240 "expandVPWidenIntOrFpInductionRecipe");
2241 }
2242
2244 // If the recipe has been unrolled return the VPValue for the induction
2245 // increment.
2246 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2247 }
2248
2249 /// Returns the number of incoming values, also number of incoming blocks.
2250 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2251 /// incoming value, its start value.
2252 unsigned getNumIncoming() const override { return 1; }
2253
2254 /// Returns the first defined value as TruncInst, if it is one or nullptr
2255 /// otherwise.
2256 TruncInst *getTruncInst() { return Trunc; }
2257 const TruncInst *getTruncInst() const { return Trunc; }
2258
2259 /// Returns true if the induction is canonical, i.e. starting at 0 and
2260 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2261 /// same type as the canonical induction.
2262 bool isCanonical() const;
2263
2264 /// Returns the scalar type of the induction.
2266 return Trunc ? Trunc->getType()
2268 }
2269
2270 /// Returns the VPValue representing the value of this induction at
2271 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2272 /// take place.
2274 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2275 }
2276
2277protected:
2278#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2279 /// Print the recipe.
2280 void printRecipe(raw_ostream &O, const Twine &Indent,
2281 VPSlotTracker &SlotTracker) const override;
2282#endif
2283};
2284
2286public:
2287 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2288 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2289 /// VF*UF.
2291 VPValue *NumUnrolledElems,
2292 const InductionDescriptor &IndDesc, DebugLoc DL)
2293 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2294 Step, IndDesc, DL) {
2295 addOperand(NumUnrolledElems);
2296 }
2297
2299
2305
2306 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2307
2308 /// Generate vector values for the pointer induction.
2309 void execute(VPTransformState &State) override {
2310 llvm_unreachable("cannot execute this recipe, should be expanded via "
2311 "expandVPWidenPointerInduction");
2312 };
2313
2314 /// Returns true if only scalar values will be generated.
2315 bool onlyScalarsGenerated(bool IsScalable);
2316
2317protected:
2318#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2319 /// Print the recipe.
2320 void printRecipe(raw_ostream &O, const Twine &Indent,
2321 VPSlotTracker &SlotTracker) const override;
2322#endif
2323};
2324
2325/// A recipe for widened phis. Incoming values are operands of the recipe and
2326/// their operand index corresponds to the incoming predecessor block. If the
2327/// recipe is placed in an entry block to a (non-replicate) region, it must have
2328/// exactly 2 incoming values, the first from the predecessor of the region and
2329/// the second from the exiting block of the region.
2331 public VPPhiAccessors {
2332 /// Name to use for the generated IR instruction for the widened phi.
2333 std::string Name;
2334
2335public:
2336 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2337 /// debug location \p DL.
2338 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2339 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2340 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2341 if (Start)
2342 addOperand(Start);
2343 }
2344
2347 getOperand(0), getDebugLoc(), Name);
2349 C->addOperand(Op);
2350 return C;
2351 }
2352
2353 ~VPWidenPHIRecipe() override = default;
2354
2355 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2356
2357 /// Generate the phi/select nodes.
2358 void execute(VPTransformState &State) override;
2359
2360protected:
2361#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2362 /// Print the recipe.
2363 void printRecipe(raw_ostream &O, const Twine &Indent,
2364 VPSlotTracker &SlotTracker) const override;
2365#endif
2366
2367 const VPRecipeBase *getAsRecipe() const override { return this; }
2368};
2369
2370/// A recipe for handling first-order recurrence phis. The start value is the
2371/// first operand of the recipe and the incoming value from the backedge is the
2372/// second operand.
2375 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2376
2377 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2378
2383
2384 void execute(VPTransformState &State) override;
2385
2386 /// Return the cost of this first-order recurrence phi recipe.
2388 VPCostContext &Ctx) const override;
2389
2390 /// Returns true if the recipe only uses the first lane of operand \p Op.
2391 bool usesFirstLaneOnly(const VPValue *Op) const override {
2393 "Op must be an operand of the recipe");
2394 return Op == getStartValue();
2395 }
2396
2397protected:
2398#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2399 /// Print the recipe.
2400 void printRecipe(raw_ostream &O, const Twine &Indent,
2401 VPSlotTracker &SlotTracker) const override;
2402#endif
2403};
2404
2405/// Possible variants of a reduction.
2406
2407/// This reduction is ordered and in-loop.
2408struct RdxOrdered {};
2409/// This reduction is in-loop.
2410struct RdxInLoop {};
2411/// This reduction is unordered with the partial result scaled down by some
2412/// factor.
2415};
2416using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2417
2418inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2419 unsigned ScaleFactor) {
2420 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2421 if (Ordered)
2422 return RdxOrdered{};
2423 if (InLoop)
2424 return RdxInLoop{};
2425 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2426}
2427
2428/// A recipe for handling reduction phis. The start value is the first operand
2429/// of the recipe and the incoming value from the backedge is the second
2430/// operand.
2432 public VPUnrollPartAccessor<2> {
2433 /// The recurrence kind of the reduction.
2434 const RecurKind Kind;
2435
2436 ReductionStyle Style;
2437
2438 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2439 /// patterns for argmin/argmax).
2440 /// TODO: Also support cases where the phi itself has a single use, but its
2441 /// compare has multiple uses.
2442 bool HasUsesOutsideReductionChain;
2443
2444public:
2445 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2447 ReductionStyle Style,
2448 bool HasUsesOutsideReductionChain = false)
2449 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2450 Style(Style),
2451 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {}
2452
2453 ~VPReductionPHIRecipe() override = default;
2454
2456 auto *R = new VPReductionPHIRecipe(
2458 *getOperand(0), Style, HasUsesOutsideReductionChain);
2459 R->addOperand(getBackedgeValue());
2460 return R;
2461 }
2462
2463 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2464
2465 /// Generate the phi/select nodes.
2466 void execute(VPTransformState &State) override;
2467
2468 /// Get the factor that the VF of this recipe's output should be scaled by, or
2469 /// 1 if it isn't scaled.
2470 unsigned getVFScaleFactor() const {
2471 auto *Partial = std::get_if<RdxUnordered>(&Style);
2472 return Partial ? Partial->VFScaleFactor : 1;
2473 }
2474
2475 /// Returns the number of incoming values, also number of incoming blocks.
2476 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2477 /// incoming value, its start value.
2478 unsigned getNumIncoming() const override { return 2; }
2479
2480 /// Returns the recurrence kind of the reduction.
2481 RecurKind getRecurrenceKind() const { return Kind; }
2482
2483 /// Returns true, if the phi is part of an ordered reduction.
2484 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2485
2486 /// Returns true if the phi is part of an in-loop reduction.
2487 bool isInLoop() const {
2488 return std::holds_alternative<RdxInLoop>(Style) ||
2489 std::holds_alternative<RdxOrdered>(Style);
2490 }
2491
2492 /// Returns true if the reduction outputs a vector with a scaled down VF.
2493 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2494
2495 /// Returns true, if the phi is part of a multi-use reduction.
2497 return HasUsesOutsideReductionChain;
2498 }
2499
2500 /// Returns true if the recipe only uses the first lane of operand \p Op.
2501 bool usesFirstLaneOnly(const VPValue *Op) const override {
2503 "Op must be an operand of the recipe");
2504 return isOrdered() || isInLoop();
2505 }
2506
2507protected:
2508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2509 /// Print the recipe.
2510 void printRecipe(raw_ostream &O, const Twine &Indent,
2511 VPSlotTracker &SlotTracker) const override;
2512#endif
2513};
2514
2515/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2516/// instructions.
2518public:
2519 /// The blend operation is a User of the incoming values and of their
2520 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2521 /// be omitted (implied by passing an odd number of operands) in which case
2522 /// all other incoming values are merged into it.
2524 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2525 assert(Operands.size() > 0 && "Expected at least one operand!");
2526 }
2527
2532
2533 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2534
2535 /// A normalized blend is one that has an odd number of operands, whereby the
2536 /// first operand does not have an associated mask.
2537 bool isNormalized() const { return getNumOperands() % 2; }
2538
2539 /// Return the number of incoming values, taking into account when normalized
2540 /// the first incoming value will have no mask.
2541 unsigned getNumIncomingValues() const {
2542 return (getNumOperands() + isNormalized()) / 2;
2543 }
2544
2545 /// Return incoming value number \p Idx.
2546 VPValue *getIncomingValue(unsigned Idx) const {
2547 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2548 }
2549
2550 /// Return mask number \p Idx.
2551 VPValue *getMask(unsigned Idx) const {
2552 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2553 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2554 }
2555
2556 /// Set mask number \p Idx to \p V.
2557 void setMask(unsigned Idx, VPValue *V) {
2558 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2559 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2560 }
2561
2562 void execute(VPTransformState &State) override {
2563 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2564 }
2565
2566 /// Return the cost of this VPWidenMemoryRecipe.
2567 InstructionCost computeCost(ElementCount VF,
2568 VPCostContext &Ctx) const override;
2569
2570 /// Returns true if the recipe only uses the first lane of operand \p Op.
2571 bool usesFirstLaneOnly(const VPValue *Op) const override {
2573 "Op must be an operand of the recipe");
2574 // Recursing through Blend recipes only, must terminate at header phi's the
2575 // latest.
2576 return all_of(users(),
2577 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2578 }
2579
2580protected:
2581#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2582 /// Print the recipe.
2583 void printRecipe(raw_ostream &O, const Twine &Indent,
2584 VPSlotTracker &SlotTracker) const override;
2585#endif
2586};
2587
2588/// A common base class for interleaved memory operations.
2589/// An Interleaved memory operation is a memory access method that combines
2590/// multiple strided loads/stores into a single wide load/store with shuffles.
2591/// The first operand is the start address. The optional operands are, in order,
2592/// the stored values and the mask.
2594 public VPIRMetadata {
2596
2597 /// Indicates if the interleave group is in a conditional block and requires a
2598 /// mask.
2599 bool HasMask = false;
2600
2601 /// Indicates if gaps between members of the group need to be masked out or if
2602 /// unusued gaps can be loaded speculatively.
2603 bool NeedsMaskForGaps = false;
2604
2605protected:
2606 VPInterleaveBase(const unsigned char SC,
2608 ArrayRef<VPValue *> Operands,
2609 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2610 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2611 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2612 NeedsMaskForGaps(NeedsMaskForGaps) {
2613 // TODO: extend the masked interleaved-group support to reversed access.
2614 assert((!Mask || !IG->isReverse()) &&
2615 "Reversed masked interleave-group not supported.");
2616 for (unsigned I = 0; I < IG->getFactor(); ++I)
2617 if (Instruction *Inst = IG->getMember(I)) {
2618 if (Inst->getType()->isVoidTy())
2619 continue;
2620 new VPValue(Inst, this);
2621 }
2622
2623 for (auto *SV : StoredValues)
2624 addOperand(SV);
2625 if (Mask) {
2626 HasMask = true;
2627 addOperand(Mask);
2628 }
2629 }
2630
2631public:
2632 VPInterleaveBase *clone() override = 0;
2633
2634 static inline bool classof(const VPRecipeBase *R) {
2635 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2636 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2637 }
2638
2639 static inline bool classof(const VPUser *U) {
2640 auto *R = dyn_cast<VPRecipeBase>(U);
2641 return R && classof(R);
2642 }
2643
2644 /// Return the address accessed by this recipe.
2645 VPValue *getAddr() const {
2646 return getOperand(0); // Address is the 1st, mandatory operand.
2647 }
2648
2649 /// Return the mask used by this recipe. Note that a full mask is represented
2650 /// by a nullptr.
2651 VPValue *getMask() const {
2652 // Mask is optional and the last operand.
2653 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2654 }
2655
2656 /// Return true if the access needs a mask because of the gaps.
2657 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2658
2660
2661 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2662
2663 void execute(VPTransformState &State) override {
2664 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2665 }
2666
2667 /// Return the cost of this recipe.
2668 InstructionCost computeCost(ElementCount VF,
2669 VPCostContext &Ctx) const override;
2670
2671 /// Returns true if the recipe only uses the first lane of operand \p Op.
2672 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2673
2674 /// Returns the number of stored operands of this interleave group. Returns 0
2675 /// for load interleave groups.
2676 virtual unsigned getNumStoreOperands() const = 0;
2677
2678 /// Return the VPValues stored by this interleave group. If it is a load
2679 /// interleave group, return an empty ArrayRef.
2681 return ArrayRef<VPValue *>(op_end() -
2682 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2684 }
2685};
2686
2687/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2688/// or stores into one wide load/store and shuffles. The first operand of a
2689/// VPInterleave recipe is the address, followed by the stored values, followed
2690/// by an optional mask.
2692public:
2694 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2695 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2696 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2697 NeedsMaskForGaps, MD, DL) {}
2698
2699 ~VPInterleaveRecipe() override = default;
2700
2704 needsMaskForGaps(), *this, getDebugLoc());
2705 }
2706
2707 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2708
2709 /// Generate the wide load or store, and shuffles.
2710 void execute(VPTransformState &State) override;
2711
2712 bool usesFirstLaneOnly(const VPValue *Op) const override {
2714 "Op must be an operand of the recipe");
2715 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2716 }
2717
2718 unsigned getNumStoreOperands() const override {
2719 return getNumOperands() - (getMask() ? 2 : 1);
2720 }
2721
2722protected:
2723#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2724 /// Print the recipe.
2725 void printRecipe(raw_ostream &O, const Twine &Indent,
2726 VPSlotTracker &SlotTracker) const override;
2727#endif
2728};
2729
2730/// A recipe for interleaved memory operations with vector-predication
2731/// intrinsics. The first operand is the address, the second operand is the
2732/// explicit vector length. Stored values and mask are optional operands.
2734public:
2736 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2737 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2738 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2739 R.getDebugLoc()) {
2740 assert(!getInterleaveGroup()->isReverse() &&
2741 "Reversed interleave-group with tail folding is not supported.");
2742 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2743 "supported for scalable vector.");
2744 }
2745
2746 ~VPInterleaveEVLRecipe() override = default;
2747
2749 llvm_unreachable("cloning not implemented yet");
2750 }
2751
2752 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2753
2754 /// The VPValue of the explicit vector length.
2755 VPValue *getEVL() const { return getOperand(1); }
2756
2757 /// Generate the wide load or store, and shuffles.
2758 void execute(VPTransformState &State) override;
2759
2760 /// The recipe only uses the first lane of the address, and EVL operand.
2761 bool usesFirstLaneOnly(const VPValue *Op) const override {
2763 "Op must be an operand of the recipe");
2764 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2765 Op == getEVL();
2766 }
2767
2768 unsigned getNumStoreOperands() const override {
2769 return getNumOperands() - (getMask() ? 3 : 2);
2770 }
2771
2772protected:
2773#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2774 /// Print the recipe.
2775 void printRecipe(raw_ostream &O, const Twine &Indent,
2776 VPSlotTracker &SlotTracker) const override;
2777#endif
2778};
2779
2780/// A recipe to represent inloop, ordered or partial reduction operations. It
2781/// performs a reduction on a vector operand into a scalar (vector in the case
2782/// of a partial reduction) value, and adds the result to a chain. The Operands
2783/// are {ChainOp, VecOp, [Condition]}.
2785
2786 /// The recurrence kind for the reduction in question.
2787 RecurKind RdxKind;
2788 /// Whether the reduction is conditional.
2789 bool IsConditional = false;
2790 ReductionStyle Style;
2791
2792protected:
2793 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2795 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2796 ReductionStyle Style, DebugLoc DL)
2797 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2798 Style(Style) {
2799 if (CondOp) {
2800 IsConditional = true;
2801 addOperand(CondOp);
2802 }
2804 }
2805
2806public:
2808 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2810 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2811 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2812 DL) {}
2813
2815 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2817 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2818 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2819 DL) {}
2820
2821 ~VPReductionRecipe() override = default;
2822
2824 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2826 getCondOp(), Style, getDebugLoc());
2827 }
2828
2829 static inline bool classof(const VPRecipeBase *R) {
2830 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2831 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2832 }
2833
2834 static inline bool classof(const VPUser *U) {
2835 auto *R = dyn_cast<VPRecipeBase>(U);
2836 return R && classof(R);
2837 }
2838
2839 static inline bool classof(const VPValue *VPV) {
2840 const VPRecipeBase *R = VPV->getDefiningRecipe();
2841 return R && classof(R);
2842 }
2843
2844 static inline bool classof(const VPSingleDefRecipe *R) {
2845 return classof(static_cast<const VPRecipeBase *>(R));
2846 }
2847
2848 /// Generate the reduction in the loop.
2849 void execute(VPTransformState &State) override;
2850
2851 /// Return the cost of VPReductionRecipe.
2852 InstructionCost computeCost(ElementCount VF,
2853 VPCostContext &Ctx) const override;
2854
2855 /// Return the recurrence kind for the in-loop reduction.
2856 RecurKind getRecurrenceKind() const { return RdxKind; }
2857 /// Return true if the in-loop reduction is ordered.
2858 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2859 /// Return true if the in-loop reduction is conditional.
2860 bool isConditional() const { return IsConditional; };
2861 /// Returns true if the reduction outputs a vector with a scaled down VF.
2862 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2863 /// Returns true if the reduction is in-loop.
2864 bool isInLoop() const {
2865 return std::holds_alternative<RdxInLoop>(Style) ||
2866 std::holds_alternative<RdxOrdered>(Style);
2867 }
2868 /// The VPValue of the scalar Chain being accumulated.
2869 VPValue *getChainOp() const { return getOperand(0); }
2870 /// The VPValue of the vector value to be reduced.
2871 VPValue *getVecOp() const { return getOperand(1); }
2872 /// The VPValue of the condition for the block.
2874 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2875 }
2876 /// Get the factor that the VF of this recipe's output should be scaled by, or
2877 /// 1 if it isn't scaled.
2878 unsigned getVFScaleFactor() const {
2879 auto *Partial = std::get_if<RdxUnordered>(&Style);
2880 return Partial ? Partial->VFScaleFactor : 1;
2881 }
2882
2883protected:
2884#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2885 /// Print the recipe.
2886 void printRecipe(raw_ostream &O, const Twine &Indent,
2887 VPSlotTracker &SlotTracker) const override;
2888#endif
2889};
2890
2891/// A recipe to represent inloop reduction operations with vector-predication
2892/// intrinsics, performing a reduction on a vector operand with the explicit
2893/// vector length (EVL) into a scalar value, and adding the result to a chain.
2894/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2896public:
2900 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2901 R.getFastMathFlags(),
2903 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2904 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2905
2906 ~VPReductionEVLRecipe() override = default;
2907
2909 llvm_unreachable("cloning not implemented yet");
2910 }
2911
2912 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2913
2914 /// Generate the reduction in the loop
2915 void execute(VPTransformState &State) override;
2916
2917 /// The VPValue of the explicit vector length.
2918 VPValue *getEVL() const { return getOperand(2); }
2919
2920 /// Returns true if the recipe only uses the first lane of operand \p Op.
2921 bool usesFirstLaneOnly(const VPValue *Op) const override {
2923 "Op must be an operand of the recipe");
2924 return Op == getEVL();
2925 }
2926
2927protected:
2928#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2929 /// Print the recipe.
2930 void printRecipe(raw_ostream &O, const Twine &Indent,
2931 VPSlotTracker &SlotTracker) const override;
2932#endif
2933};
2934
2935/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2936/// copies of the original scalar type, one per lane, instead of producing a
2937/// single copy of widened type for all lanes. If the instruction is known to be
2938/// a single scalar, only one copy, per lane zero, will be generated.
2940 public VPIRMetadata {
2941 /// Indicator if only a single replica per lane is needed.
2942 bool IsSingleScalar;
2943
2944 /// Indicator if the replicas are also predicated.
2945 bool IsPredicated;
2946
2947public:
2949 bool IsSingleScalar, VPValue *Mask = nullptr,
2950 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2951 DebugLoc DL = DebugLoc::getUnknown())
2952 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2953 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2954 IsPredicated(Mask) {
2955 setUnderlyingValue(I);
2956 if (Mask)
2957 addOperand(Mask);
2958 }
2959
2960 ~VPReplicateRecipe() override = default;
2961
2963 auto *Copy = new VPReplicateRecipe(
2964 getUnderlyingInstr(), operands(), IsSingleScalar,
2965 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2966 Copy->transferFlags(*this);
2967 return Copy;
2968 }
2969
2970 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2971
2972 /// Generate replicas of the desired Ingredient. Replicas will be generated
2973 /// for all parts and lanes unless a specific part and lane are specified in
2974 /// the \p State.
2975 void execute(VPTransformState &State) override;
2976
2977 /// Return the cost of this VPReplicateRecipe.
2978 InstructionCost computeCost(ElementCount VF,
2979 VPCostContext &Ctx) const override;
2980
2981 bool isSingleScalar() const { return IsSingleScalar; }
2982
2983 bool isPredicated() const { return IsPredicated; }
2984
2985 /// Returns true if the recipe only uses the first lane of operand \p Op.
2986 bool usesFirstLaneOnly(const VPValue *Op) const override {
2988 "Op must be an operand of the recipe");
2989 return isSingleScalar();
2990 }
2991
2992 /// Returns true if the recipe uses scalars of operand \p Op.
2993 bool usesScalars(const VPValue *Op) const override {
2995 "Op must be an operand of the recipe");
2996 return true;
2997 }
2998
2999 /// Returns true if the recipe is used by a widened recipe via an intervening
3000 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3001 /// in a vector.
3002 bool shouldPack() const;
3003
3004 /// Return the mask of a predicated VPReplicateRecipe.
3006 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3007 return getOperand(getNumOperands() - 1);
3008 }
3009
3010 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3011
3012protected:
3013#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3014 /// Print the recipe.
3015 void printRecipe(raw_ostream &O, const Twine &Indent,
3016 VPSlotTracker &SlotTracker) const override;
3017#endif
3018};
3019
3020/// A recipe for generating conditional branches on the bits of a mask.
3022public:
3024 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3025
3028 }
3029
3030 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3031
3032 /// Generate the extraction of the appropriate bit from the block mask and the
3033 /// conditional branch.
3034 void execute(VPTransformState &State) override;
3035
3036 /// Return the cost of this VPBranchOnMaskRecipe.
3037 InstructionCost computeCost(ElementCount VF,
3038 VPCostContext &Ctx) const override;
3039
3040#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3041 /// Print the recipe.
3042 void printRecipe(raw_ostream &O, const Twine &Indent,
3043 VPSlotTracker &SlotTracker) const override {
3044 O << Indent << "BRANCH-ON-MASK ";
3046 }
3047#endif
3048
3049 /// Returns true if the recipe uses scalars of operand \p Op.
3050 bool usesScalars(const VPValue *Op) const override {
3052 "Op must be an operand of the recipe");
3053 return true;
3054 }
3055};
3056
3057/// A recipe to combine multiple recipes into a single 'expression' recipe,
3058/// which should be considered a single entity for cost-modeling and transforms.
3059/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3060/// expression recipes, before execute. The individual expression recipes are
3061/// completely disconnected from the def-use graph of other recipes not part of
3062/// the expression. Def-use edges between pairs of expression recipes remain
3063/// intact, whereas every edge between an expression recipe and a recipe outside
3064/// the expression is elevated to connect the non-expression recipe with the
3065/// VPExpressionRecipe itself.
3066class VPExpressionRecipe : public VPSingleDefRecipe {
3067 /// Recipes included in this VPExpressionRecipe. This could contain
3068 /// duplicates.
3069 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3070
3071 /// Temporary VPValues used for external operands of the expression, i.e.
3072 /// operands not defined by recipes in the expression.
3073 SmallVector<VPValue *> LiveInPlaceholders;
3074
3075 enum class ExpressionTypes {
3076 /// Represents an inloop extended reduction operation, performing a
3077 /// reduction on an extended vector operand into a scalar value, and adding
3078 /// the result to a chain.
3079 ExtendedReduction,
3080 /// Represent an inloop multiply-accumulate reduction, multiplying the
3081 /// extended vector operands, performing a reduction.add on the result, and
3082 /// adding the scalar result to a chain.
3083 ExtMulAccReduction,
3084 /// Represent an inloop multiply-accumulate reduction, multiplying the
3085 /// vector operands, performing a reduction.add on the result, and adding
3086 /// the scalar result to a chain.
3087 MulAccReduction,
3088 /// Represent an inloop multiply-accumulate reduction, multiplying the
3089 /// extended vector operands, negating the multiplication, performing a
3090 /// reduction.add on the result, and adding the scalar result to a chain.
3091 ExtNegatedMulAccReduction,
3092 };
3093
3094 /// Type of the expression.
3095 ExpressionTypes ExpressionType;
3096
3097 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3098 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3099 /// in the expression) are replaced by temporary VPValues and the original
3100 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3101 /// as needed (excluding last) to ensure they are only used by other recipes
3102 /// in the expression.
3103 VPExpressionRecipe(ExpressionTypes ExpressionType,
3104 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3105
3106public:
3108 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3110 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3113 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3114 {Ext0, Ext1, Mul, Red}) {}
3117 VPReductionRecipe *Red)
3118 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3119 {Ext0, Ext1, Mul, Sub, Red}) {
3120 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3121 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3122 "Expected an add reduction");
3123 assert(getNumOperands() >= 3 && "Expected at least three operands");
3124 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3125 assert(SubConst && SubConst->getValue() == 0 &&
3126 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3127 }
3128
3130 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3131 for (auto *R : reverse(ExpressionRecipes)) {
3132 if (ExpressionRecipesSeen.insert(R).second)
3133 delete R;
3134 }
3135 for (VPValue *T : LiveInPlaceholders)
3136 delete T;
3137 }
3138
3139 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3140
3141 VPExpressionRecipe *clone() override {
3142 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3143 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3144 for (auto *R : ExpressionRecipes)
3145 NewExpressiondRecipes.push_back(R->clone());
3146 for (auto *New : NewExpressiondRecipes) {
3147 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3148 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3149 // Update placeholder operands in the cloned recipe to use the external
3150 // operands, to be internalized when the cloned expression is constructed.
3151 for (const auto &[Placeholder, OutsideOp] :
3152 zip(LiveInPlaceholders, operands()))
3153 New->replaceUsesOfWith(Placeholder, OutsideOp);
3154 }
3155 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3156 }
3157
3158 /// Return the VPValue to use to infer the result type of the recipe.
3160 unsigned OpIdx =
3161 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3162 : 1;
3163 return getOperand(getNumOperands() - OpIdx);
3164 }
3165
3166 /// Insert the recipes of the expression back into the VPlan, directly before
3167 /// the current recipe. Leaves the expression recipe empty, which must be
3168 /// removed before codegen.
3169 void decompose();
3170
3171 unsigned getVFScaleFactor() const {
3172 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3173 return PR ? PR->getVFScaleFactor() : 1;
3174 }
3175
3176 /// Method for generating code, must not be called as this recipe is abstract.
3177 void execute(VPTransformState &State) override {
3178 llvm_unreachable("recipe must be removed before execute");
3179 }
3180
3182 VPCostContext &Ctx) const override;
3183
3184 /// Returns true if this expression contains recipes that may read from or
3185 /// write to memory.
3186 bool mayReadOrWriteMemory() const;
3187
3188 /// Returns true if this expression contains recipes that may have side
3189 /// effects.
3190 bool mayHaveSideEffects() const;
3191
3192 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3193 bool isSingleScalar() const;
3194
3195protected:
3196#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3197 /// Print the recipe.
3198 void printRecipe(raw_ostream &O, const Twine &Indent,
3199 VPSlotTracker &SlotTracker) const override;
3200#endif
3201};
3202
3203/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3204/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3205/// order to merge values that are set under such a branch and feed their uses.
3206/// The phi nodes can be scalar or vector depending on the users of the value.
3207/// This recipe works in concert with VPBranchOnMaskRecipe.
3209public:
3210 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3211 /// nodes after merging back from a Branch-on-Mask.
3213 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3214 ~VPPredInstPHIRecipe() override = default;
3215
3217 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3218 }
3219
3220 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3221
3222 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3223 /// retain SSA form.
3224 void execute(VPTransformState &State) override;
3225
3226 /// Return the cost of this VPPredInstPHIRecipe.
3228 VPCostContext &Ctx) const override {
3229 // TODO: Compute accurate cost after retiring the legacy cost model.
3230 return 0;
3231 }
3232
3233 /// Returns true if the recipe uses scalars of operand \p Op.
3234 bool usesScalars(const VPValue *Op) const override {
3236 "Op must be an operand of the recipe");
3237 return true;
3238 }
3239
3240protected:
3241#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3242 /// Print the recipe.
3243 void printRecipe(raw_ostream &O, const Twine &Indent,
3244 VPSlotTracker &SlotTracker) const override;
3245#endif
3246};
3247
3248/// A common base class for widening memory operations. An optional mask can be
3249/// provided as the last operand.
3251 public VPIRMetadata {
3252protected:
3254
3255 /// Alignment information for this memory access.
3257
3258 /// Whether the accessed addresses are consecutive.
3260
3261 /// Whether the consecutive accessed addresses are in reverse order.
3263
3264 /// Whether the memory access is masked.
3265 bool IsMasked = false;
3266
3267 void setMask(VPValue *Mask) {
3268 assert(!IsMasked && "cannot re-set mask");
3269 if (!Mask)
3270 return;
3271 addOperand(Mask);
3272 IsMasked = true;
3273 }
3274
3275 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3276 std::initializer_list<VPValue *> Operands,
3277 bool Consecutive, bool Reverse,
3278 const VPIRMetadata &Metadata, DebugLoc DL)
3279 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3281 Reverse(Reverse) {
3282 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3284 "Reversed acccess without VPVectorEndPointerRecipe address?");
3285 }
3286
3287public:
3289 llvm_unreachable("cloning not supported");
3290 }
3291
3292 static inline bool classof(const VPRecipeBase *R) {
3293 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3294 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3295 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3296 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3297 }
3298
3299 static inline bool classof(const VPUser *U) {
3300 auto *R = dyn_cast<VPRecipeBase>(U);
3301 return R && classof(R);
3302 }
3303
3304 /// Return whether the loaded-from / stored-to addresses are consecutive.
3305 bool isConsecutive() const { return Consecutive; }
3306
3307 /// Return whether the consecutive loaded/stored addresses are in reverse
3308 /// order.
3309 bool isReverse() const { return Reverse; }
3310
3311 /// Return the address accessed by this recipe.
3312 VPValue *getAddr() const { return getOperand(0); }
3313
3314 /// Returns true if the recipe is masked.
3315 bool isMasked() const { return IsMasked; }
3316
3317 /// Return the mask used by this recipe. Note that a full mask is represented
3318 /// by a nullptr.
3319 VPValue *getMask() const {
3320 // Mask is optional and therefore the last operand.
3321 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3322 }
3323
3324 /// Returns the alignment of the memory access.
3325 Align getAlign() const { return Alignment; }
3326
3327 /// Generate the wide load/store.
3328 void execute(VPTransformState &State) override {
3329 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3330 }
3331
3332 /// Return the cost of this VPWidenMemoryRecipe.
3333 InstructionCost computeCost(ElementCount VF,
3334 VPCostContext &Ctx) const override;
3335
3337};
3338
3339/// A recipe for widening load operations, using the address to load from and an
3340/// optional mask.
3342 public VPValue {
3344 bool Consecutive, bool Reverse,
3345 const VPIRMetadata &Metadata, DebugLoc DL)
3346 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3347 Reverse, Metadata, DL),
3348 VPValue(this, &Load) {
3349 setMask(Mask);
3350 }
3351
3354 getMask(), Consecutive, Reverse, *this,
3355 getDebugLoc());
3356 }
3357
3358 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3359
3360 /// Generate a wide load or gather.
3361 void execute(VPTransformState &State) override;
3362
3363 /// Returns true if the recipe only uses the first lane of operand \p Op.
3364 bool usesFirstLaneOnly(const VPValue *Op) const override {
3366 "Op must be an operand of the recipe");
3367 // Widened, consecutive loads operations only demand the first lane of
3368 // their address.
3369 return Op == getAddr() && isConsecutive();
3370 }
3371
3372protected:
3373#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3374 /// Print the recipe.
3375 void printRecipe(raw_ostream &O, const Twine &Indent,
3376 VPSlotTracker &SlotTracker) const override;
3377#endif
3378};
3379
3380/// A recipe for widening load operations with vector-predication intrinsics,
3381/// using the address to load from, the explicit vector length and an optional
3382/// mask.
3383struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3385 VPValue *Mask)
3386 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3387 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3388 L.getDebugLoc()),
3389 VPValue(this, &getIngredient()) {
3390 setMask(Mask);
3391 }
3392
3393 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3394
3395 /// Return the EVL operand.
3396 VPValue *getEVL() const { return getOperand(1); }
3397
3398 /// Generate the wide load or gather.
3399 void execute(VPTransformState &State) override;
3400
3401 /// Return the cost of this VPWidenLoadEVLRecipe.
3403 VPCostContext &Ctx) const override;
3404
3405 /// Returns true if the recipe only uses the first lane of operand \p Op.
3406 bool usesFirstLaneOnly(const VPValue *Op) const override {
3408 "Op must be an operand of the recipe");
3409 // Widened loads only demand the first lane of EVL and consecutive loads
3410 // only demand the first lane of their address.
3411 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3412 }
3413
3414protected:
3415#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3416 /// Print the recipe.
3417 void printRecipe(raw_ostream &O, const Twine &Indent,
3418 VPSlotTracker &SlotTracker) const override;
3419#endif
3420};
3421
3422/// A recipe for widening store operations, using the stored value, the address
3423/// to store to and an optional mask.
3425 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3426 VPValue *Mask, bool Consecutive, bool Reverse,
3427 const VPIRMetadata &Metadata, DebugLoc DL)
3428 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3429 Consecutive, Reverse, Metadata, DL) {
3430 setMask(Mask);
3431 }
3432
3438
3439 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3440
3441 /// Return the value stored by this recipe.
3442 VPValue *getStoredValue() const { return getOperand(1); }
3443
3444 /// Generate a wide store or scatter.
3445 void execute(VPTransformState &State) override;
3446
3447 /// Returns true if the recipe only uses the first lane of operand \p Op.
3448 bool usesFirstLaneOnly(const VPValue *Op) const override {
3450 "Op must be an operand of the recipe");
3451 // Widened, consecutive stores only demand the first lane of their address,
3452 // unless the same operand is also stored.
3453 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3454 }
3455
3456protected:
3457#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3458 /// Print the recipe.
3459 void printRecipe(raw_ostream &O, const Twine &Indent,
3460 VPSlotTracker &SlotTracker) const override;
3461#endif
3462};
3463
3464/// A recipe for widening store operations with vector-predication intrinsics,
3465/// using the value to store, the address to store to, the explicit vector
3466/// length and an optional mask.
3469 VPValue *Mask)
3470 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3471 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3472 S.isReverse(), S, S.getDebugLoc()) {
3473 setMask(Mask);
3474 }
3475
3476 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3477
3478 /// Return the address accessed by this recipe.
3479 VPValue *getStoredValue() const { return getOperand(1); }
3480
3481 /// Return the EVL operand.
3482 VPValue *getEVL() const { return getOperand(2); }
3483
3484 /// Generate the wide store or scatter.
3485 void execute(VPTransformState &State) override;
3486
3487 /// Return the cost of this VPWidenStoreEVLRecipe.
3489 VPCostContext &Ctx) const override;
3490
3491 /// Returns true if the recipe only uses the first lane of operand \p Op.
3492 bool usesFirstLaneOnly(const VPValue *Op) const override {
3494 "Op must be an operand of the recipe");
3495 if (Op == getEVL()) {
3496 assert(getStoredValue() != Op && "unexpected store of EVL");
3497 return true;
3498 }
3499 // Widened, consecutive memory operations only demand the first lane of
3500 // their address, unless the same operand is also stored. That latter can
3501 // happen with opaque pointers.
3502 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3503 }
3504
3505protected:
3506#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3507 /// Print the recipe.
3508 void printRecipe(raw_ostream &O, const Twine &Indent,
3509 VPSlotTracker &SlotTracker) const override;
3510#endif
3511};
3512
3513/// Recipe to expand a SCEV expression.
3515 const SCEV *Expr;
3516
3517public:
3519 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3520
3521 ~VPExpandSCEVRecipe() override = default;
3522
3523 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3524
3525 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3526
3527 void execute(VPTransformState &State) override {
3528 llvm_unreachable("SCEV expressions must be expanded before final execute");
3529 }
3530
3531 /// Return the cost of this VPExpandSCEVRecipe.
3533 VPCostContext &Ctx) const override {
3534 // TODO: Compute accurate cost after retiring the legacy cost model.
3535 return 0;
3536 }
3537
3538 const SCEV *getSCEV() const { return Expr; }
3539
3540protected:
3541#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3542 /// Print the recipe.
3543 void printRecipe(raw_ostream &O, const Twine &Indent,
3544 VPSlotTracker &SlotTracker) const override;
3545#endif
3546};
3547
3548/// Canonical scalar induction phi of the vector loop. Starting at the specified
3549/// start value (either 0 or the resume value when vectorizing the epilogue
3550/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3551/// canonical induction variable.
3553public:
3555 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3556
3557 ~VPCanonicalIVPHIRecipe() override = default;
3558
3560 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3561 R->addOperand(getBackedgeValue());
3562 return R;
3563 }
3564
3565 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3566
3567 void execute(VPTransformState &State) override {
3568 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3569 "scalar phi recipe");
3570 }
3571
3572 /// Returns the scalar type of the induction.
3574 return getStartValue()->getLiveInIRValue()->getType();
3575 }
3576
3577 /// Returns true if the recipe only uses the first lane of operand \p Op.
3578 bool usesFirstLaneOnly(const VPValue *Op) const override {
3580 "Op must be an operand of the recipe");
3581 return true;
3582 }
3583
3584 /// Returns true if the recipe only uses the first part of operand \p Op.
3585 bool usesFirstPartOnly(const VPValue *Op) const override {
3587 "Op must be an operand of the recipe");
3588 return true;
3589 }
3590
3591 /// Return the cost of this VPCanonicalIVPHIRecipe.
3593 VPCostContext &Ctx) const override {
3594 // For now, match the behavior of the legacy cost model.
3595 return 0;
3596 }
3597
3598protected:
3599#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3600 /// Print the recipe.
3601 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3602 VPSlotTracker &SlotTracker) const override;
3603#endif
3604};
3605
3606/// A recipe for generating the active lane mask for the vector loop that is
3607/// used to predicate the vector operations.
3608/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3609/// remove VPActiveLaneMaskPHIRecipe.
3611public:
3613 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3614 DL) {}
3615
3616 ~VPActiveLaneMaskPHIRecipe() override = default;
3617
3620 if (getNumOperands() == 2)
3621 R->addOperand(getOperand(1));
3622 return R;
3623 }
3624
3625 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3626
3627 /// Generate the active lane mask phi of the vector loop.
3628 void execute(VPTransformState &State) override;
3629
3630protected:
3631#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3632 /// Print the recipe.
3633 void printRecipe(raw_ostream &O, const Twine &Indent,
3634 VPSlotTracker &SlotTracker) const override;
3635#endif
3636};
3637
3638/// A recipe for generating the phi node for the current index of elements,
3639/// adjusted in accordance with EVL value. It starts at the start value of the
3640/// canonical induction and gets incremented by EVL in each iteration of the
3641/// vector loop.
3643public:
3645 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3646
3647 ~VPEVLBasedIVPHIRecipe() override = default;
3648
3650 llvm_unreachable("cloning not implemented yet");
3651 }
3652
3653 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3654
3655 void execute(VPTransformState &State) override {
3656 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3657 "scalar phi recipe");
3658 }
3659
3660 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3662 VPCostContext &Ctx) const override {
3663 // For now, match the behavior of the legacy cost model.
3664 return 0;
3665 }
3666
3667 /// Returns true if the recipe only uses the first lane of operand \p Op.
3668 bool usesFirstLaneOnly(const VPValue *Op) const override {
3670 "Op must be an operand of the recipe");
3671 return true;
3672 }
3673
3674protected:
3675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3676 /// Print the recipe.
3677 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3678 VPSlotTracker &SlotTracker) const override;
3679#endif
3680};
3681
3682/// A Recipe for widening the canonical induction variable of the vector loop.
3684 public VPUnrollPartAccessor<1> {
3685public:
3687 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3688
3689 ~VPWidenCanonicalIVRecipe() override = default;
3690
3695
3696 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3697
3698 /// Generate a canonical vector induction variable of the vector loop, with
3699 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3700 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3701 void execute(VPTransformState &State) override;
3702
3703 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3705 VPCostContext &Ctx) const override {
3706 // TODO: Compute accurate cost after retiring the legacy cost model.
3707 return 0;
3708 }
3709
3710protected:
3711#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3712 /// Print the recipe.
3713 void printRecipe(raw_ostream &O, const Twine &Indent,
3714 VPSlotTracker &SlotTracker) const override;
3715#endif
3716};
3717
3718/// A recipe for converting the input value \p IV value to the corresponding
3719/// value of an IV with different start and step values, using Start + IV *
3720/// Step.
3722 /// Kind of the induction.
3724 /// If not nullptr, the floating point induction binary operator. Must be set
3725 /// for floating point inductions.
3726 const FPMathOperator *FPBinOp;
3727
3728 /// Name to use for the generated IR instruction for the derived IV.
3729 std::string Name;
3730
3731public:
3733 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3734 const Twine &Name = "")
3736 IndDesc.getKind(),
3737 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3738 Start, CanonicalIV, Step, Name) {}
3739
3741 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3742 VPValue *Step, const Twine &Name = "")
3743 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3744 FPBinOp(FPBinOp), Name(Name.str()) {}
3745
3746 ~VPDerivedIVRecipe() override = default;
3747
3749 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3750 getStepValue());
3751 }
3752
3753 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3754
3755 /// Generate the transformed value of the induction at offset StartValue (1.
3756 /// operand) + IV (2. operand) * StepValue (3, operand).
3757 void execute(VPTransformState &State) override;
3758
3759 /// Return the cost of this VPDerivedIVRecipe.
3761 VPCostContext &Ctx) const override {
3762 // TODO: Compute accurate cost after retiring the legacy cost model.
3763 return 0;
3764 }
3765
3767 return getStartValue()->getLiveInIRValue()->getType();
3768 }
3769
3770 VPValue *getStartValue() const { return getOperand(0); }
3771 VPValue *getStepValue() const { return getOperand(2); }
3772
3773 /// Returns true if the recipe only uses the first lane of operand \p Op.
3774 bool usesFirstLaneOnly(const VPValue *Op) const override {
3776 "Op must be an operand of the recipe");
3777 return true;
3778 }
3779
3780protected:
3781#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3782 /// Print the recipe.
3783 void printRecipe(raw_ostream &O, const Twine &Indent,
3784 VPSlotTracker &SlotTracker) const override;
3785#endif
3786};
3787
3788/// A recipe for handling phi nodes of integer and floating-point inductions,
3789/// producing their scalar values.
3791 public VPUnrollPartAccessor<3> {
3792 Instruction::BinaryOps InductionOpcode;
3793
3794public:
3797 DebugLoc DL)
3798 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3799 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3800 InductionOpcode(Opcode) {}
3801
3803 VPValue *Step, VPValue *VF,
3806 IV, Step, VF, IndDesc.getInductionOpcode(),
3807 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3808 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3809 : FastMathFlags(),
3810 DL) {}
3811
3812 ~VPScalarIVStepsRecipe() override = default;
3813
3815 return new VPScalarIVStepsRecipe(
3816 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3818 getDebugLoc());
3819 }
3820
3821 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3822 /// this is only accurate after the VPlan has been unrolled.
3823 bool isPart0() const { return getUnrollPart(*this) == 0; }
3824
3825 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3826
3827 /// Generate the scalarized versions of the phi node as needed by their users.
3828 void execute(VPTransformState &State) override;
3829
3830 /// Return the cost of this VPScalarIVStepsRecipe.
3832 VPCostContext &Ctx) const override {
3833 // TODO: Compute accurate cost after retiring the legacy cost model.
3834 return 0;
3835 }
3836
3837 VPValue *getStepValue() const { return getOperand(1); }
3838
3839 /// Returns true if the recipe only uses the first lane of operand \p Op.
3840 bool usesFirstLaneOnly(const VPValue *Op) const override {
3842 "Op must be an operand of the recipe");
3843 return true;
3844 }
3845
3846protected:
3847#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3848 /// Print the recipe.
3849 void printRecipe(raw_ostream &O, const Twine &Indent,
3850 VPSlotTracker &SlotTracker) const override;
3851#endif
3852};
3853
3854/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3855/// types implementing VPPhiAccessors. Used by isa<> & co.
3857 static inline bool isPossible(const VPRecipeBase *f) {
3858 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3860 }
3861};
3862/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3863/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3864template <typename SrcTy>
3865struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3866
3868
3869 /// doCast is used by cast<>.
3870 static inline VPPhiAccessors *doCast(SrcTy R) {
3871 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3872 switch (R->getVPDefID()) {
3873 case VPDef::VPInstructionSC:
3874 return cast<VPPhi>(R);
3875 case VPDef::VPIRInstructionSC:
3876 return cast<VPIRPhi>(R);
3877 case VPDef::VPWidenPHISC:
3878 return cast<VPWidenPHIRecipe>(R);
3879 default:
3880 return cast<VPHeaderPHIRecipe>(R);
3881 }
3882 }());
3883 }
3884
3885 /// doCastIfPossible is used by dyn_cast<>.
3886 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3887 if (!Self::isPossible(f))
3888 return nullptr;
3889 return doCast(f);
3890 }
3891};
3892template <>
3895template <>
3898
3899/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3900/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3901namespace detail {
3902template <typename DstTy, typename RecipeBasePtrTy>
3903static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3904 switch (R->getVPDefID()) {
3905 case VPDef::VPInstructionSC:
3906 return cast<VPInstruction>(R);
3907 case VPDef::VPWidenSC:
3908 return cast<VPWidenRecipe>(R);
3909 case VPDef::VPWidenCastSC:
3910 return cast<VPWidenCastRecipe>(R);
3911 case VPDef::VPWidenIntrinsicSC:
3913 case VPDef::VPWidenCallSC:
3914 return cast<VPWidenCallRecipe>(R);
3915 case VPDef::VPWidenSelectSC:
3916 return cast<VPWidenSelectRecipe>(R);
3917 case VPDef::VPReplicateSC:
3918 return cast<VPReplicateRecipe>(R);
3919 case VPDef::VPInterleaveSC:
3920 case VPDef::VPInterleaveEVLSC:
3921 return cast<VPInterleaveBase>(R);
3922 case VPDef::VPWidenLoadSC:
3923 case VPDef::VPWidenLoadEVLSC:
3924 case VPDef::VPWidenStoreSC:
3925 case VPDef::VPWidenStoreEVLSC:
3926 return cast<VPWidenMemoryRecipe>(R);
3927 default:
3928 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3929 }
3930}
3931} // namespace detail
3932
3933/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3934/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3935template <typename DstTy, typename SrcTy>
3936struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3937 static inline bool isPossible(SrcTy R) {
3938 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3939 // also handled in castToVPIRMetadata.
3945 }
3946
3947 using RetTy = DstTy *;
3948
3949 /// doCast is used by cast<>.
3950 static inline RetTy doCast(SrcTy R) {
3952 }
3953
3954 /// doCastIfPossible is used by dyn_cast<>.
3955 static inline RetTy doCastIfPossible(SrcTy R) {
3956 if (!isPossible(R))
3957 return nullptr;
3958 return doCast(R);
3959 }
3960};
3961template <>
3964template <>
3967
3968/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3969/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3970/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3971class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3972 friend class VPlan;
3973
3974 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3975 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3976 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3977 if (Recipe)
3978 appendRecipe(Recipe);
3979 }
3980
3981public:
3983
3984protected:
3985 /// The VPRecipes held in the order of output instructions to generate.
3987
3988 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3989 : VPBlockBase(BlockSC, Name.str()) {}
3990
3991public:
3992 ~VPBasicBlock() override {
3993 while (!Recipes.empty())
3994 Recipes.pop_back();
3995 }
3996
3997 /// Instruction iterators...
4002
4003 //===--------------------------------------------------------------------===//
4004 /// Recipe iterator methods
4005 ///
4006 inline iterator begin() { return Recipes.begin(); }
4007 inline const_iterator begin() const { return Recipes.begin(); }
4008 inline iterator end() { return Recipes.end(); }
4009 inline const_iterator end() const { return Recipes.end(); }
4010
4011 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4012 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4013 inline reverse_iterator rend() { return Recipes.rend(); }
4014 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4015
4016 inline size_t size() const { return Recipes.size(); }
4017 inline bool empty() const { return Recipes.empty(); }
4018 inline const VPRecipeBase &front() const { return Recipes.front(); }
4019 inline VPRecipeBase &front() { return Recipes.front(); }
4020 inline const VPRecipeBase &back() const { return Recipes.back(); }
4021 inline VPRecipeBase &back() { return Recipes.back(); }
4022
4023 /// Returns a reference to the list of recipes.
4025
4026 /// Returns a pointer to a member of the recipe list.
4027 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4028 return &VPBasicBlock::Recipes;
4029 }
4030
4031 /// Method to support type inquiry through isa, cast, and dyn_cast.
4032 static inline bool classof(const VPBlockBase *V) {
4033 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4034 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4035 }
4036
4037 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4038 assert(Recipe && "No recipe to append.");
4039 assert(!Recipe->Parent && "Recipe already in VPlan");
4040 Recipe->Parent = this;
4041 Recipes.insert(InsertPt, Recipe);
4042 }
4043
4044 /// Augment the existing recipes of a VPBasicBlock with an additional
4045 /// \p Recipe as the last recipe.
4046 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4047
4048 /// The method which generates the output IR instructions that correspond to
4049 /// this VPBasicBlock, thereby "executing" the VPlan.
4050 void execute(VPTransformState *State) override;
4051
4052 /// Return the cost of this VPBasicBlock.
4053 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4054
4055 /// Return the position of the first non-phi node recipe in the block.
4056 iterator getFirstNonPhi();
4057
4058 /// Returns an iterator range over the PHI-like recipes in the block.
4062
4063 /// Split current block at \p SplitAt by inserting a new block between the
4064 /// current block and its successors and moving all recipes starting at
4065 /// SplitAt to the new block. Returns the new block.
4066 VPBasicBlock *splitAt(iterator SplitAt);
4067
4068 VPRegionBlock *getEnclosingLoopRegion();
4069 const VPRegionBlock *getEnclosingLoopRegion() const;
4070
4071#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4072 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4073 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4074 ///
4075 /// Note that the numbering is applied to the whole VPlan, so printing
4076 /// individual blocks is consistent with the whole VPlan printing.
4077 void print(raw_ostream &O, const Twine &Indent,
4078 VPSlotTracker &SlotTracker) const override;
4079 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4080#endif
4081
4082 /// If the block has multiple successors, return the branch recipe terminating
4083 /// the block. If there are no or only a single successor, return nullptr;
4084 VPRecipeBase *getTerminator();
4085 const VPRecipeBase *getTerminator() const;
4086
4087 /// Returns true if the block is exiting it's parent region.
4088 bool isExiting() const;
4089
4090 /// Clone the current block and it's recipes, without updating the operands of
4091 /// the cloned recipes.
4092 VPBasicBlock *clone() override;
4093
4094 /// Returns the predecessor block at index \p Idx with the predecessors as per
4095 /// the corresponding plain CFG. If the block is an entry block to a region,
4096 /// the first predecessor is the single predecessor of a region, and the
4097 /// second predecessor is the exiting block of the region.
4098 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4099
4100protected:
4101 /// Execute the recipes in the IR basic block \p BB.
4102 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4103
4104 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4105 /// generated for this VPBB.
4106 void connectToPredecessors(VPTransformState &State);
4107
4108private:
4109 /// Create an IR BasicBlock to hold the output instructions generated by this
4110 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4111 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4112};
4113
4114inline const VPBasicBlock *
4116 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4117}
4118
4119/// A special type of VPBasicBlock that wraps an existing IR basic block.
4120/// Recipes of the block get added before the first non-phi instruction in the
4121/// wrapped block.
4122/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4123/// preheader block.
4124class VPIRBasicBlock : public VPBasicBlock {
4125 friend class VPlan;
4126
4127 BasicBlock *IRBB;
4128
4129 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4130 VPIRBasicBlock(BasicBlock *IRBB)
4131 : VPBasicBlock(VPIRBasicBlockSC,
4132 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4133 IRBB(IRBB) {}
4134
4135public:
4136 ~VPIRBasicBlock() override = default;
4137
4138 static inline bool classof(const VPBlockBase *V) {
4139 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4140 }
4141
4142 /// The method which generates the output IR instructions that correspond to
4143 /// this VPBasicBlock, thereby "executing" the VPlan.
4144 void execute(VPTransformState *State) override;
4145
4146 VPIRBasicBlock *clone() override;
4147
4148 BasicBlock *getIRBasicBlock() const { return IRBB; }
4149};
4150
4151/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4152/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4153/// A VPRegionBlock may indicate that its contents are to be replicated several
4154/// times. This is designed to support predicated scalarization, in which a
4155/// scalar if-then code structure needs to be generated VF * UF times. Having
4156/// this replication indicator helps to keep a single model for multiple
4157/// candidate VF's. The actual replication takes place only once the desired VF
4158/// and UF have been determined.
4159class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4160 friend class VPlan;
4161
4162 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4163 VPBlockBase *Entry;
4164
4165 /// Hold the Single Exiting block of the SESE region modelled by the
4166 /// VPRegionBlock.
4167 VPBlockBase *Exiting;
4168
4169 /// An indicator whether this region is to generate multiple replicated
4170 /// instances of output IR corresponding to its VPBlockBases.
4171 bool IsReplicator;
4172
4173 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4174 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4175 const std::string &Name = "", bool IsReplicator = false)
4176 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4177 IsReplicator(IsReplicator) {
4178 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4179 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4180 Entry->setParent(this);
4181 Exiting->setParent(this);
4182 }
4183 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4184 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4185 IsReplicator(IsReplicator) {}
4186
4187public:
4188 ~VPRegionBlock() override = default;
4189
4190 /// Method to support type inquiry through isa, cast, and dyn_cast.
4191 static inline bool classof(const VPBlockBase *V) {
4192 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4193 }
4194
4195 const VPBlockBase *getEntry() const { return Entry; }
4196 VPBlockBase *getEntry() { return Entry; }
4197
4198 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4199 /// EntryBlock must have no predecessors.
4200 void setEntry(VPBlockBase *EntryBlock) {
4201 assert(EntryBlock->getPredecessors().empty() &&
4202 "Entry block cannot have predecessors.");
4203 Entry = EntryBlock;
4204 EntryBlock->setParent(this);
4205 }
4206
4207 const VPBlockBase *getExiting() const { return Exiting; }
4208 VPBlockBase *getExiting() { return Exiting; }
4209
4210 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4211 /// ExitingBlock must have no successors.
4212 void setExiting(VPBlockBase *ExitingBlock) {
4213 assert(ExitingBlock->getSuccessors().empty() &&
4214 "Exit block cannot have successors.");
4215 Exiting = ExitingBlock;
4216 ExitingBlock->setParent(this);
4217 }
4218
4219 /// Returns the pre-header VPBasicBlock of the loop region.
4221 assert(!isReplicator() && "should only get pre-header of loop regions");
4222 return getSinglePredecessor()->getExitingBasicBlock();
4223 }
4224
4225 /// An indicator whether this region is to generate multiple replicated
4226 /// instances of output IR corresponding to its VPBlockBases.
4227 bool isReplicator() const { return IsReplicator; }
4228
4229 /// The method which generates the output IR instructions that correspond to
4230 /// this VPRegionBlock, thereby "executing" the VPlan.
4231 void execute(VPTransformState *State) override;
4232
4233 // Return the cost of this region.
4234 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4235
4236#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4237 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4238 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4239 /// consequtive numbers.
4240 ///
4241 /// Note that the numbering is applied to the whole VPlan, so printing
4242 /// individual regions is consistent with the whole VPlan printing.
4243 void print(raw_ostream &O, const Twine &Indent,
4244 VPSlotTracker &SlotTracker) const override;
4245 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4246#endif
4247
4248 /// Clone all blocks in the single-entry single-exit region of the block and
4249 /// their recipes without updating the operands of the cloned recipes.
4250 VPRegionBlock *clone() override;
4251
4252 /// Remove the current region from its VPlan, connecting its predecessor to
4253 /// its entry, and its exiting block to its successor.
4254 void dissolveToCFGLoop();
4255
4256 /// Returns the canonical induction recipe of the region.
4258 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4259 if (EntryVPBB->empty()) {
4260 // VPlan native path. TODO: Unify both code paths.
4261 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4262 }
4263 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4264 }
4266 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4267 }
4268
4269 /// Return the type of the canonical IV for loop regions.
4270 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4271 const Type *getCanonicalIVType() const {
4272 return getCanonicalIV()->getScalarType();
4273 }
4274};
4275
4277 return getParent()->getParent();
4278}
4279
4281 return getParent()->getParent();
4282}
4283
4284/// VPlan models a candidate for vectorization, encoding various decisions take
4285/// to produce efficient output IR, including which branches, basic-blocks and
4286/// output IR instructions to generate, and their cost. VPlan holds a
4287/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4288/// VPBasicBlock.
4289class VPlan {
4290 friend class VPlanPrinter;
4291 friend class VPSlotTracker;
4292
4293 /// VPBasicBlock corresponding to the original preheader. Used to place
4294 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4295 /// rest of VPlan execution.
4296 /// When this VPlan is used for the epilogue vector loop, the entry will be
4297 /// replaced by a new entry block created during skeleton creation.
4298 VPBasicBlock *Entry;
4299
4300 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4301 VPIRBasicBlock *ScalarHeader;
4302
4303 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4304 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4305 /// e.g. if the scalar epilogue always executes.
4307
4308 /// Holds the VFs applicable to this VPlan.
4310
4311 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4312 /// any UF.
4314
4315 /// Holds the name of the VPlan, for printing.
4316 std::string Name;
4317
4318 /// Represents the trip count of the original loop, for folding
4319 /// the tail.
4320 VPValue *TripCount = nullptr;
4321
4322 /// Represents the backedge taken count of the original loop, for folding
4323 /// the tail. It equals TripCount - 1.
4324 VPValue *BackedgeTakenCount = nullptr;
4325
4326 /// Represents the vector trip count.
4327 VPValue VectorTripCount;
4328
4329 /// Represents the vectorization factor of the loop.
4330 VPValue VF;
4331
4332 /// Represents the loop-invariant VF * UF of the vector loop region.
4333 VPValue VFxUF;
4334
4335 /// Holds a mapping between Values and their corresponding VPValue inside
4336 /// VPlan.
4337 Value2VPValueTy Value2VPValue;
4338
4339 /// Contains all the external definitions created for this VPlan. External
4340 /// definitions are VPValues that hold a pointer to their underlying IR.
4342
4343 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4344 /// VPlan is destroyed.
4345 SmallVector<VPBlockBase *> CreatedBlocks;
4346
4347 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4348 /// wrapping the original header of the scalar loop.
4349 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4350 : Entry(Entry), ScalarHeader(ScalarHeader) {
4351 Entry->setPlan(this);
4352 assert(ScalarHeader->getNumSuccessors() == 0 &&
4353 "scalar header must be a leaf node");
4354 }
4355
4356public:
4357 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4358 /// original preheader and scalar header of \p L, to be used as entry and
4359 /// scalar header blocks of the new VPlan.
4360 VPlan(Loop *L);
4361
4362 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4363 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4364 VPlan(BasicBlock *ScalarHeaderBB) {
4365 setEntry(createVPBasicBlock("preheader"));
4366 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4367 }
4368
4370
4372 Entry = VPBB;
4373 VPBB->setPlan(this);
4374 }
4375
4376 /// Generate the IR code for this VPlan.
4377 void execute(VPTransformState *State);
4378
4379 /// Return the cost of this plan.
4381
4382 VPBasicBlock *getEntry() { return Entry; }
4383 const VPBasicBlock *getEntry() const { return Entry; }
4384
4385 /// Returns the preheader of the vector loop region, if one exists, or null
4386 /// otherwise.
4388 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4389 return VectorRegion
4390 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4391 : nullptr;
4392 }
4393
4394 /// Returns the VPRegionBlock of the vector loop.
4397
4398 /// Returns the 'middle' block of the plan, that is the block that selects
4399 /// whether to execute the scalar tail loop or the exit block from the loop
4400 /// latch. If there is an early exit from the vector loop, the middle block
4401 /// conceptully has the early exit block as third successor, split accross 2
4402 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4403 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4404 /// known to always execute, the middle block may branch directly to that
4405 /// block. This function cannot be called once the vector loop region has been
4406 /// removed.
4408 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4409 assert(
4410 LoopRegion &&
4411 "cannot call the function after vector loop region has been removed");
4412 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4413 if (RegionSucc->getSingleSuccessor() ||
4414 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4415 return RegionSucc;
4416 // There is an early exit. The successor of RegionSucc is the middle block.
4417 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4418 }
4419
4421 return const_cast<VPlan *>(this)->getMiddleBlock();
4422 }
4423
4424 /// Return the VPBasicBlock for the preheader of the scalar loop.
4426 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4427 }
4428
4429 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4430 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4431
4432 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4433 /// the original scalar loop.
4434 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4435
4436 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4437 /// exit block.
4439
4440 /// Returns true if \p VPBB is an exit block.
4441 bool isExitBlock(VPBlockBase *VPBB);
4442
4443 /// The trip count of the original loop.
4445 assert(TripCount && "trip count needs to be set before accessing it");
4446 return TripCount;
4447 }
4448
4449 /// Set the trip count assuming it is currently null; if it is not - use
4450 /// resetTripCount().
4451 void setTripCount(VPValue *NewTripCount) {
4452 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4453 TripCount = NewTripCount;
4454 }
4455
4456 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4457 /// the original trip count have been replaced.
4458 void resetTripCount(VPValue *NewTripCount) {
4459 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4460 "TripCount must be set when resetting");
4461 TripCount = NewTripCount;
4462 }
4463
4464 /// The backedge taken count of the original loop.
4466 if (!BackedgeTakenCount)
4467 BackedgeTakenCount = new VPValue();
4468 return BackedgeTakenCount;
4469 }
4470 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4471
4472 /// The vector trip count.
4473 VPValue &getVectorTripCount() { return VectorTripCount; }
4474
4475 /// Returns the VF of the vector loop region.
4476 VPValue &getVF() { return VF; };
4477 const VPValue &getVF() const { return VF; };
4478
4479 /// Returns VF * UF of the vector loop region.
4480 VPValue &getVFxUF() { return VFxUF; }
4481
4484 }
4485
4486 void addVF(ElementCount VF) { VFs.insert(VF); }
4487
4489 assert(hasVF(VF) && "Cannot set VF not already in plan");
4490 VFs.clear();
4491 VFs.insert(VF);
4492 }
4493
4494 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4495 bool hasScalableVF() const {
4496 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4497 }
4498
4499 /// Returns an iterator range over all VFs of the plan.
4502 return VFs;
4503 }
4504
4505 bool hasScalarVFOnly() const {
4506 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4507 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4508 "Plan with scalar VF should only have a single VF");
4509 return HasScalarVFOnly;
4510 }
4511
4512 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4513
4514 unsigned getUF() const {
4515 assert(UFs.size() == 1 && "Expected a single UF");
4516 return UFs[0];
4517 }
4518
4519 void setUF(unsigned UF) {
4520 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4521 UFs.clear();
4522 UFs.insert(UF);
4523 }
4524
4525 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4526 /// concrete UF.
4527 bool isUnrolled() const { return UFs.size() == 1; }
4528
4529 /// Return a string with the name of the plan and the applicable VFs and UFs.
4530 std::string getName() const;
4531
4532 void setName(const Twine &newName) { Name = newName.str(); }
4533
4534 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4535 /// yet) for \p V.
4537 assert(V && "Trying to get or add the VPValue of a null Value");
4538 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4539 if (Inserted) {
4540 VPValue *VPV = new VPValue(V);
4541 VPLiveIns.push_back(VPV);
4542 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4543 It->second = VPV;
4544 }
4545
4546 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4547 return It->second;
4548 }
4549
4550 /// Return a VPValue wrapping i1 true.
4551 VPValue *getTrue() { return getConstantInt(1, 1); }
4552
4553 /// Return a VPValue wrapping i1 false.
4554 VPValue *getFalse() { return getConstantInt(1, 0); }
4555
4556 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4557 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4558 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4559 }
4560
4561 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4563 bool IsSigned = false) {
4564 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4565 }
4566
4567 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4569 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4570 }
4571
4572 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4573 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4574
4575 /// Return the list of live-in VPValues available in the VPlan.
4577 assert(all_of(Value2VPValue,
4578 [this](const auto &P) {
4579 return is_contained(VPLiveIns, P.second);
4580 }) &&
4581 "all VPValues in Value2VPValue must also be in VPLiveIns");
4582 return VPLiveIns;
4583 }
4584
4585#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4586 /// Print the live-ins of this VPlan to \p O.
4587 void printLiveIns(raw_ostream &O) const;
4588
4589 /// Print this VPlan to \p O.
4590 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4591
4592 /// Print this VPlan in DOT format to \p O.
4593 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4594
4595 /// Dump the plan to stderr (for debugging).
4596 LLVM_DUMP_METHOD void dump() const;
4597#endif
4598
4599 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4600 /// recipes to refer to the clones, and return it.
4602
4603 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4604 /// present. The returned block is owned by the VPlan and deleted once the
4605 /// VPlan is destroyed.
4607 VPRecipeBase *Recipe = nullptr) {
4608 auto *VPB = new VPBasicBlock(Name, Recipe);
4609 CreatedBlocks.push_back(VPB);
4610 return VPB;
4611 }
4612
4613 /// Create a new loop region with \p Name and entry and exiting blocks set
4614 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4615 /// owned by the VPlan and deleted once the VPlan is destroyed.
4616 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4617 VPBlockBase *Entry = nullptr,
4618 VPBlockBase *Exiting = nullptr) {
4619 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4620 : new VPRegionBlock(Name);
4621 CreatedBlocks.push_back(VPB);
4622 return VPB;
4623 }
4624
4625 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4626 /// returned block is owned by the VPlan and deleted once the VPlan is
4627 /// destroyed.
4629 const std::string &Name = "") {
4630 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4631 CreatedBlocks.push_back(VPB);
4632 return VPB;
4633 }
4634
4635 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4636 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4637 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4639
4640 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4641 /// instructions in \p IRBB, except its terminator which is managed by the
4642 /// successors of the block in VPlan. The returned block is owned by the VPlan
4643 /// and deleted once the VPlan is destroyed.
4645
4646 /// Returns true if the VPlan is based on a loop with an early exit. That is
4647 /// the case if the VPlan has either more than one exit block or a single exit
4648 /// block with multiple predecessors (one for the exit via the latch and one
4649 /// via the other early exit).
4650 bool hasEarlyExit() const {
4651 return count_if(ExitBlocks,
4652 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4653 1 ||
4654 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4655 }
4656
4657 /// Returns true if the scalar tail may execute after the vector loop. Note
4658 /// that this relies on unneeded branches to the scalar tail loop being
4659 /// removed.
4660 bool hasScalarTail() const {
4661 return !(!getScalarPreheader()->hasPredecessors() ||
4663 }
4664};
4665
4666#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4667inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4668 Plan.print(OS);
4669 return OS;
4670}
4671#endif
4672
4673} // end namespace llvm
4674
4675#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:337
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3618
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3612
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3971
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3999
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4046
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4001
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3998
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4024
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3982
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3988
iterator end()
Definition VPlan.h:4008
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4006
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4000
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4059
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:3992
const_reverse_iterator rbegin() const
Definition VPlan.h:4012
reverse_iterator rend()
Definition VPlan.h:4013
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3986
VPRecipeBase & back()
Definition VPlan.h:4021
const VPRecipeBase & front() const
Definition VPlan.h:4018
const_iterator begin() const
Definition VPlan.h:4007
VPRecipeBase & front()
Definition VPlan.h:4019
const VPRecipeBase & back() const
Definition VPlan.h:4020
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4037
bool empty() const
Definition VPlan.h:4017
const_iterator end() const
Definition VPlan.h:4009
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4032
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4027
reverse_iterator rbegin()
Definition VPlan.h:4011
friend class VPlan
Definition VPlan.h:3972
size_t size() const
Definition VPlan.h:4016
const_reverse_iterator rend() const
Definition VPlan.h:4014
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2546
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2551
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2541
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2562
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2571
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2528
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2523
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2557
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2537
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3042
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3026
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3050
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3023
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3552
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3578
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3559
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3585
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3554
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3573
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3567
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3592
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:310
friend class VPValue
Definition VPlanValue.h:311
VPDef(const unsigned char SC)
Definition VPlanValue.h:389
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3760
VPValue * getStepValue() const
Definition VPlan.h:3771
Type * getScalarType() const
Definition VPlan.h:3766
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3748
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3740
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3774
VPValue * getStartValue() const
Definition VPlan.h:3770
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3732
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3668
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3649
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3655
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3661
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3644
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3527
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3532
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3518
const SCEV * getSCEV() const
Definition VPlan.h:3538
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3523
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3177
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3159
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3141
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3129
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3115
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3107
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3111
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3171
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3109
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2061
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2074
static bool classof(const VPValue *V)
Definition VPlan.h:2071
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2097
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2102
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2086
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2094
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2067
VPValue * getStartValue() const
Definition VPlan.h:2089
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2106
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2056
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1773
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1790
unsigned getOpcode() const
Definition VPlan.h:1786
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1767
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4124
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4148
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4138
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4125
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
FastMathFlagsTy FMFs
Definition VPlan.h:680
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:740
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:732
WrapFlagsTy WrapFlags
Definition VPlan.h:674
CmpInst::Predicate CmpPredicate
Definition VPlan.h:673
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:726
GEPNoWrapFlags GEPFlags
Definition VPlan.h:678
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:858
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:675
CmpInst::Predicate getPredicate() const
Definition VPlan.h:835
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:865
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:749
ExactFlagsTy ExactFlags
Definition VPlan.h:677
bool hasNoSignedWrap() const
Definition VPlan.h:884
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:895
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:735
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:738
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:743
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:723
bool isNonNeg() const
Definition VPlan.h:867
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:850
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:853
DisjointFlagsTy DisjointFlags
Definition VPlan.h:676
unsigned AllFlags
Definition VPlan.h:682
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:841
bool hasNoUnsignedWrap() const
Definition VPlan.h:873
FCmpFlagsTy FCmpFlags
Definition VPlan.h:681
NonNegFlagsTy NonNegFlags
Definition VPlan.h:679
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:759
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:795
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:746
VPIRFlags(Instruction &I)
Definition VPlan.h:688
Instruction & getInstruction() const
Definition VPlan.h:1444
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1452
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1431
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1458
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1446
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1419
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1018
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:990
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1002
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1258
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1299
static bool classof(const VPUser *R)
Definition VPlan.h:1284
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1266
Type * getResultType() const
Definition VPlan.h:1305
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1288
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1031
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1171
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1124
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1069
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1114
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1127
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1066
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1118
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1061
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1058
@ VScale
Returns the value for vscale.
Definition VPlan.h:1129
@ CanonicalIVIncrementForPart
Definition VPlan.h:1051
bool hasResult() const
Definition VPlan.h:1195
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1235
unsigned getOpcode() const
Definition VPlan.h:1179
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1238
friend class VPlanSlp
Definition VPlan.h:1032
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2657
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2663
static bool classof(const VPUser *U)
Definition VPlan.h:2639
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2606
Instruction * getInsertPos() const
Definition VPlan.h:2661
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2634
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2659
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2651
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2680
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2645
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2733
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2761
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2755
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2768
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2748
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2735
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2691
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2718
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2701
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2712
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2693
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1317
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1339
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1334
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4115
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1359
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1326
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1344
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1348
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3234
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3216
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3227
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3212
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4276
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2918
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2897
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2921
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2908
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2484
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2455
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2470
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2496
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2478
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2487
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2501
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2493
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2446
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2481
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2784
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2793
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2860
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2829
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2844
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2871
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2873
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2856
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2807
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2858
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2814
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2862
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2869
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2864
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2823
static bool classof(const VPUser *U)
Definition VPlan.h:2834
static bool classof(const VPValue *VPV)
Definition VPlan.h:2839
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2878
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4159
const VPBlockBase * getEntry() const
Definition VPlan.h:4195
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4270
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4227
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4212
VPBlockBase * getExiting()
Definition VPlan.h:4208
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4257
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4200
const Type * getCanonicalIVType() const
Definition VPlan.h:4271
const VPBlockBase * getExiting() const
Definition VPlan.h:4207
VPBlockBase * getEntry()
Definition VPlan.h:4196
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4265
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4220
friend class VPlan
Definition VPlan.h:4160
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4191
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2940
bool isSingleScalar() const
Definition VPlan.h:2981
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2948
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2993
bool isPredicated() const
Definition VPlan.h:2983
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2962
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2986
unsigned getOpcode() const
Definition VPlan.h:3010
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3005
VPValue * getStepValue() const
Definition VPlan.h:3837
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3831
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3802
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3823
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3814
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3795
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3840
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:595
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:598
static bool classof(const VPUser *U)
Definition VPlan.h:587
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:970
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:207
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1420
operand_range operands()
Definition VPlanValue.h:275
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:251
unsigned getNumOperands() const
Definition VPlanValue.h:245
operand_iterator op_end()
Definition VPlanValue.h:273
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:246
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:226
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:269
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:268
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:183
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:193
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:178
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1939
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1960
const VPValue * getVFValue() const
Definition VPlan.h:1935
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1953
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1946
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1924
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:2013
Type * getSourceElementType() const
Definition VPlan.h:1990
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1992
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1999
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1980
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2016
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2006
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1707
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1714
const_operand_range args() const
Definition VPlan.h:1747
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1728
operand_range args()
Definition VPlan.h:1746
Function * getCalledScalarFunction() const
Definition VPlan.h:1742
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3704
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3691
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3686
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1557
Instruction::CastOps getOpcode() const
Definition VPlan.h:1593
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1596
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1565
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1578
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1887
Type * getSourceElementType() const
Definition VPlan.h:1892
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1895
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1879
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1865
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2183
static bool classof(const VPValue *V)
Definition VPlan.h:2137
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2153
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2168
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2161
PHINode * getPHINode() const
Definition VPlan.h:2163
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2125
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2149
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2166
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2175
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2132
const VPValue * getVFValue() const
Definition VPlan.h:2156
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2142
const VPValue * getStepValue() const
Definition VPlan.h:2150
const TruncInst * getTruncInst() const
Definition VPlan.h:2257
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2238
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2213
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2230
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2256
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2204
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2273
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2252
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2265
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1607
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1638
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1678
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1687
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1624
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1693
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1659
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1690
~VPWidenIntrinsicRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1681
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3265
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3262
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3305
static bool classof(const VPUser *U)
Definition VPlan.h:3299
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3328
Instruction & Ingredient
Definition VPlan.h:3253
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3288
Instruction & getIngredient() const
Definition VPlan.h:3336
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3259
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3292
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3319
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3256
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3315
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3275
void setMask(VPValue *Mask)
Definition VPlan.h:3267
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3325
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3312
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3309
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2367
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2338
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2345
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2300
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2309
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2290
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1509
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1529
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1519
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:1513
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1546
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4289
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1106
friend class VPSlotTracker
Definition VPlan.h:4291
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1082
bool hasVF(ElementCount VF) const
Definition VPlan.h:4494
LLVMContext & getContext() const
Definition VPlan.h:4482
VPBasicBlock * getEntry()
Definition VPlan.h:4382
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4473
void setName(const Twine &newName)
Definition VPlan.h:4532
bool hasScalableVF() const
Definition VPlan.h:4495
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4480
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4476
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4444
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4551
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4465
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4501
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:890
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:868
const VPValue & getVF() const
Definition VPlan.h:4477
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:898
const VPBasicBlock * getEntry() const
Definition VPlan.h:4383
friend class VPlanPrinter
Definition VPlan.h:4290
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4568
unsigned getUF() const
Definition VPlan.h:4514
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4628
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1220
bool hasUF(unsigned UF) const
Definition VPlan.h:4512
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4434
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4557
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4470
void setVF(ElementCount VF)
Definition VPlan.h:4488
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4527
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1011
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4650
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:993
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4420
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4451
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4458
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4407
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4371
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4606
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1226
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4554
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4536
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4616
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1112
bool hasScalarVFOnly() const
Definition VPlan.h:4505
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4425
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:905
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4576
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1065
void addVF(ElementCount VF)
Definition VPlan.h:4486
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4430
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4573
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4562
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1027
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4387
void setUF(unsigned UF)
Definition VPlan.h:4519
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4660
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1153
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4364
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2425
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3903
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2418
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:199
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2416
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3936
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3950
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3955
static bool isPossible(SrcTy R)
Definition VPlan.h:3937
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3865
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3886
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3867
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3870
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3857
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2410
Possible variants of a reduction.
Definition VPlan.h:2408
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2413
unsigned VFScaleFactor
Definition VPlan.h:2414
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2379
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2374
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2391
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1490
VPIRPhi(PHINode &PN)
Definition VPlan.h:1483
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1485
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1501
static bool classof(const VPUser *U)
Definition VPlan.h:1377
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1392
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1407
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1374
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1387
static bool classof(const VPValue *V)
Definition VPlan.h:1382
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:923
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:929
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:924
static bool classof(const VPValue *V)
Definition VPlan.h:949
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:956
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:944
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3383
void execute(VPTransformState &State) override
Generate the wide load or gather.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3396
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3384
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3406
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3342
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3364
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3343
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3352
A recipe for widening select instructions.
Definition VPlan.h:1806
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1817
VPWidenSelectRecipe(SelectInst *SI, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL={})
Definition VPlan.h:1807
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1838
VPValue * getCond() const
Definition VPlan.h:1833
unsigned getOpcode() const
Definition VPlan.h:1831
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3467
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3479
void execute(VPTransformState &State) override
Generate the wide store or scatter.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3492
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3468
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3482
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3424
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3442
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3433
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3448
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3425