LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPBlendSC:
561 case VPRecipeBase::VPPredInstPHISC:
562 case VPRecipeBase::VPCanonicalIVPHISC:
563 case VPRecipeBase::VPActiveLaneMaskPHISC:
564 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
565 case VPRecipeBase::VPWidenPHISC:
566 case VPRecipeBase::VPWidenIntOrFpInductionSC:
567 case VPRecipeBase::VPWidenPointerInductionSC:
568 case VPRecipeBase::VPReductionPHISC:
569 return true;
570 case VPRecipeBase::VPBranchOnMaskSC:
571 case VPRecipeBase::VPInterleaveEVLSC:
572 case VPRecipeBase::VPInterleaveSC:
573 case VPRecipeBase::VPIRInstructionSC:
574 case VPRecipeBase::VPWidenLoadEVLSC:
575 case VPRecipeBase::VPWidenLoadSC:
576 case VPRecipeBase::VPWidenStoreEVLSC:
577 case VPRecipeBase::VPWidenStoreSC:
578 case VPRecipeBase::VPHistogramSC:
579 // TODO: Widened stores don't define a value, but widened loads do. Split
580 // the recipes to be able to make widened loads VPSingleDefRecipes.
581 return false;
582 }
583 llvm_unreachable("Unhandled VPDefID");
584 }
585
586 static inline bool classof(const VPUser *U) {
587 auto *R = dyn_cast<VPRecipeBase>(U);
588 return R && classof(R);
589 }
590
591 VPSingleDefRecipe *clone() override = 0;
592
593 /// Returns the underlying instruction.
600
601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
602 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
604#endif
605};
606
607/// Class to record and manage LLVM IR flags.
609 enum class OperationType : unsigned char {
610 Cmp,
611 FCmp,
612 OverflowingBinOp,
613 Trunc,
614 DisjointOp,
615 PossiblyExactOp,
616 GEPOp,
617 FPMathOp,
618 NonNegOp,
619 Other
620 };
621
622public:
623 struct WrapFlagsTy {
624 char HasNUW : 1;
625 char HasNSW : 1;
626
628 };
629
631 char HasNUW : 1;
632 char HasNSW : 1;
633
635 };
636
641
643 char NonNeg : 1;
644 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
645 };
646
647private:
648 struct ExactFlagsTy {
649 char IsExact : 1;
650 };
651 struct FastMathFlagsTy {
652 char AllowReassoc : 1;
653 char NoNaNs : 1;
654 char NoInfs : 1;
655 char NoSignedZeros : 1;
656 char AllowReciprocal : 1;
657 char AllowContract : 1;
658 char ApproxFunc : 1;
659
660 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
661 };
662 /// Holds both the predicate and fast-math flags for floating-point
663 /// comparisons.
664 struct FCmpFlagsTy {
666 FastMathFlagsTy FMFs;
667 };
668
669 OperationType OpType;
670
671 union {
676 ExactFlagsTy ExactFlags;
679 FastMathFlagsTy FMFs;
680 FCmpFlagsTy FCmpFlags;
681 unsigned AllFlags;
682 };
683
684public:
685 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
686
688 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
689 OpType = OperationType::FCmp;
690 FCmpFlags.Pred = FCmp->getPredicate();
691 FCmpFlags.FMFs = FCmp->getFastMathFlags();
692 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
693 OpType = OperationType::Cmp;
694 CmpPredicate = Op->getPredicate();
695 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
696 OpType = OperationType::DisjointOp;
697 DisjointFlags.IsDisjoint = Op->isDisjoint();
698 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
699 OpType = OperationType::OverflowingBinOp;
700 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
701 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
702 OpType = OperationType::Trunc;
703 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
704 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
705 OpType = OperationType::PossiblyExactOp;
706 ExactFlags.IsExact = Op->isExact();
707 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
708 OpType = OperationType::GEPOp;
709 GEPFlags = GEP->getNoWrapFlags();
710 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
711 OpType = OperationType::NonNegOp;
712 NonNegFlags.NonNeg = PNNI->hasNonNeg();
713 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
714 OpType = OperationType::FPMathOp;
715 FMFs = Op->getFastMathFlags();
716 } else {
717 OpType = OperationType::Other;
718 AllFlags = 0;
719 }
720 }
721
723 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
724
726 : OpType(OperationType::FCmp) {
727 FCmpFlags.Pred = Pred;
728 FCmpFlags.FMFs = FMFs;
729 }
730
732 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
733
735 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
736
737 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
738
740 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
741
743 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
744
746 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
747
749 OpType = Other.OpType;
750 AllFlags = Other.AllFlags;
751 }
752
753 /// Only keep flags also present in \p Other. \p Other must have the same
754 /// OpType as the current object.
755 void intersectFlags(const VPIRFlags &Other);
756
757 /// Drop all poison-generating flags.
759 // NOTE: This needs to be kept in-sync with
760 // Instruction::dropPoisonGeneratingFlags.
761 switch (OpType) {
762 case OperationType::OverflowingBinOp:
763 WrapFlags.HasNUW = false;
764 WrapFlags.HasNSW = false;
765 break;
766 case OperationType::Trunc:
767 TruncFlags.HasNUW = false;
768 TruncFlags.HasNSW = false;
769 break;
770 case OperationType::DisjointOp:
771 DisjointFlags.IsDisjoint = false;
772 break;
773 case OperationType::PossiblyExactOp:
774 ExactFlags.IsExact = false;
775 break;
776 case OperationType::GEPOp:
778 break;
779 case OperationType::FPMathOp:
780 case OperationType::FCmp:
781 getFMFsRef().NoNaNs = false;
782 getFMFsRef().NoInfs = false;
783 break;
784 case OperationType::NonNegOp:
785 NonNegFlags.NonNeg = false;
786 break;
787 case OperationType::Cmp:
788 case OperationType::Other:
789 break;
790 }
791 }
792
793 /// Apply the IR flags to \p I.
794 void applyFlags(Instruction &I) const {
795 switch (OpType) {
796 case OperationType::OverflowingBinOp:
797 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
798 I.setHasNoSignedWrap(WrapFlags.HasNSW);
799 break;
800 case OperationType::Trunc:
801 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
802 I.setHasNoSignedWrap(TruncFlags.HasNSW);
803 break;
804 case OperationType::DisjointOp:
805 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
806 break;
807 case OperationType::PossiblyExactOp:
808 I.setIsExact(ExactFlags.IsExact);
809 break;
810 case OperationType::GEPOp:
811 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
812 break;
813 case OperationType::FPMathOp:
814 case OperationType::FCmp: {
815 const FastMathFlagsTy &F = getFMFsRef();
816 I.setHasAllowReassoc(F.AllowReassoc);
817 I.setHasNoNaNs(F.NoNaNs);
818 I.setHasNoInfs(F.NoInfs);
819 I.setHasNoSignedZeros(F.NoSignedZeros);
820 I.setHasAllowReciprocal(F.AllowReciprocal);
821 I.setHasAllowContract(F.AllowContract);
822 I.setHasApproxFunc(F.ApproxFunc);
823 break;
824 }
825 case OperationType::NonNegOp:
826 I.setNonNeg(NonNegFlags.NonNeg);
827 break;
828 case OperationType::Cmp:
829 case OperationType::Other:
830 break;
831 }
832 }
833
835 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
836 "recipe doesn't have a compare predicate");
837 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
838 }
839
841 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
842 "recipe doesn't have a compare predicate");
843 if (OpType == OperationType::FCmp)
844 FCmpFlags.Pred = Pred;
845 else
846 CmpPredicate = Pred;
847 }
848
850
851 /// Returns true if the recipe has a comparison predicate.
852 bool hasPredicate() const {
853 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
854 }
855
856 /// Returns true if the recipe has fast-math flags.
857 bool hasFastMathFlags() const {
858 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
859 }
860
862
863 /// Returns true if the recipe has non-negative flag.
864 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
865
866 bool isNonNeg() const {
867 assert(OpType == OperationType::NonNegOp &&
868 "recipe doesn't have a NNEG flag");
869 return NonNegFlags.NonNeg;
870 }
871
872 bool hasNoUnsignedWrap() const {
873 switch (OpType) {
874 case OperationType::OverflowingBinOp:
875 return WrapFlags.HasNUW;
876 case OperationType::Trunc:
877 return TruncFlags.HasNUW;
878 default:
879 llvm_unreachable("recipe doesn't have a NUW flag");
880 }
881 }
882
883 bool hasNoSignedWrap() const {
884 switch (OpType) {
885 case OperationType::OverflowingBinOp:
886 return WrapFlags.HasNSW;
887 case OperationType::Trunc:
888 return TruncFlags.HasNSW;
889 default:
890 llvm_unreachable("recipe doesn't have a NSW flag");
891 }
892 }
893
894 bool isDisjoint() const {
895 assert(OpType == OperationType::DisjointOp &&
896 "recipe cannot have a disjoing flag");
897 return DisjointFlags.IsDisjoint;
898 }
899
900private:
901 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
902 FastMathFlagsTy &getFMFsRef() {
903 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
904 }
905 const FastMathFlagsTy &getFMFsRef() const {
906 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
907 }
908
909public:
910#if !defined(NDEBUG)
911 /// Returns true if the set flags are valid for \p Opcode.
912 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
913#endif
914
915#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
916 void printFlags(raw_ostream &O) const;
917#endif
918};
919
920/// A pure-virtual common base class for recipes defining a single VPValue and
921/// using IR flags.
923 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
924 const VPIRFlags &Flags,
926 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
927
928 static inline bool classof(const VPRecipeBase *R) {
929 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
930 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
935 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
936 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
937 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
938 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
939 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
940 }
941
942 static inline bool classof(const VPUser *U) {
943 auto *R = dyn_cast<VPRecipeBase>(U);
944 return R && classof(R);
945 }
946
947 static inline bool classof(const VPValue *V) {
948 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
949 return R && classof(R);
950 }
951
952 VPRecipeWithIRFlags *clone() override = 0;
953
954 static inline bool classof(const VPSingleDefRecipe *U) {
955 auto *R = dyn_cast<VPRecipeBase>(U);
956 return R && classof(R);
957 }
958
959 void execute(VPTransformState &State) override = 0;
960
961 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
963 VPCostContext &Ctx) const;
964};
965
966/// Helper to access the operand that contains the unroll part for this recipe
967/// after unrolling.
968template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
969protected:
970 /// Return the VPValue operand containing the unroll part or null if there is
971 /// no such operand.
972 VPValue *getUnrollPartOperand(const VPUser &U) const;
973
974 /// Return the unroll part.
975 unsigned getUnrollPart(const VPUser &U) const;
976};
977
978/// Helper to manage IR metadata for recipes. It filters out metadata that
979/// cannot be propagated.
982
983public:
984 VPIRMetadata() = default;
985
986 /// Adds metatadata that can be preserved from the original instruction
987 /// \p I.
989
990 /// Copy constructor for cloning.
991 VPIRMetadata(const VPIRMetadata &Other) = default;
992
994
995 /// Add all metadata to \p I.
996 void applyMetadata(Instruction &I) const;
997
998 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
999 /// already exists, it will be replaced. Otherwise, it will be added.
1000 void setMetadata(unsigned Kind, MDNode *Node) {
1001 auto It =
1002 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1003 return P.first == Kind;
1004 });
1005 if (It != Metadata.end())
1006 It->second = Node;
1007 else
1008 Metadata.emplace_back(Kind, Node);
1009 }
1010
1011 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1012 /// nodes that are common to both.
1013 void intersect(const VPIRMetadata &MD);
1014
1015 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1016 MDNode *getMetadata(unsigned Kind) const {
1017 auto It =
1018 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1019 return It != Metadata.end() ? It->second : nullptr;
1020 }
1021
1022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1023 /// Print metadata with node IDs.
1024 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1025#endif
1026};
1027
1028/// This is a concrete Recipe that models a single VPlan-level instruction.
1029/// While as any Recipe it may generate a sequence of IR instructions when
1030/// executed, these instructions would always form a single-def expression as
1031/// the VPInstruction is also a single def-use vertex.
1033 public VPIRMetadata,
1034 public VPUnrollPartAccessor<1> {
1035 friend class VPlanSlp;
1036
1037public:
1038 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1039 enum {
1041 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1042 // values of a first-order recurrence.
1046 // Creates a mask where each lane is active (true) whilst the current
1047 // counter (first operand + index) is less than the second operand. i.e.
1048 // mask[i] = icmpt ult (op0 + i), op1
1049 // The size of the mask returned is VF * Multiplier (UF, third op).
1053 // Increment the canonical IV separately for each unrolled part.
1055 // Abstract instruction that compares two values and branches. This is
1056 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1059 // Branch with 2 boolean condition operands and 3 successors. If condition
1060 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1061 // successor 1; otherwise branches to successor 2. Expanded after region
1062 // dissolution into: (1) an OR of the two conditions branching to
1063 // middle.split or successor 2, and (2) middle.split branching to successor
1064 // 0 or successor 1 based on condition 0.
1067 /// Given operands of (the same) struct type, creates a struct of fixed-
1068 /// width vectors each containing a struct field of all operands. The
1069 /// number of operands matches the element count of every vector.
1071 /// Creates a fixed-width vector containing all operands. The number of
1072 /// operands matches the vector element count.
1074 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1075 /// abstract VPInstruction whose single defined VPValue represents VF
1076 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1077 /// VPInstructions.
1079 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1080 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1084 // Extracts the last part of its operand. Removed during unrolling.
1086 // Extracts the last lane of its vector operand, per part.
1088 // Extracts the second-to-last lane from its operand or the second-to-last
1089 // part if it is scalar. In the latter case, the recipe will be removed
1090 // during unrolling.
1092 LogicalAnd, // Non-poison propagating logical And.
1093 // Add an offset in bytes (second operand) to a base pointer (first
1094 // operand). Only generates scalar values (either for the first lane only or
1095 // for all lanes, depending on its uses).
1097 // Add a vector offset in bytes (second operand) to a scalar base pointer
1098 // (first operand).
1100 // Returns a scalar boolean value, which is true if any lane of its
1101 // (boolean) vector operands is true. It produces the reduced value across
1102 // all unrolled iterations. Unrolling will add all copies of its original
1103 // operand as additional operands. AnyOf is poison-safe as all operands
1104 // will be frozen.
1106 // Calculates the first active lane index of the vector predicate operands.
1107 // It produces the lane index across all unrolled iterations. Unrolling will
1108 // add all copies of its original operand as additional operands.
1109 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1110 // result even with operands that are all zeroes.
1112 // Calculates the last active lane index of the vector predicate operands.
1113 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1114 // tail-folding to extract the correct live-out value from the last active
1115 // iteration. It produces the lane index across all unrolled iterations.
1116 // Unrolling will add all copies of its original operand as additional
1117 // operands.
1119 // Returns a reversed vector for the operand.
1121
1122 // The opcodes below are used for VPInstructionWithType.
1123 //
1124 /// Scale the first operand (vector step) by the second operand
1125 /// (scalar-step). Casts both operands to the result type if needed.
1127 /// Start vector for reductions with 3 operands: the original start value,
1128 /// the identity value for the reduction and an integer indicating the
1129 /// scaling factor.
1131 // Creates a step vector starting from 0 to VF with a step of 1.
1133 /// Extracts a single lane (first operand) from a set of vector operands.
1134 /// The lane specifies an index into a vector formed by combining all vector
1135 /// operands (all operands after the first one).
1137 /// Explicit user for the resume phi of the canonical induction in the main
1138 /// VPlan, used by the epilogue vector loop.
1140 /// Returns the value for vscale.
1143 };
1144
1145 /// Returns true if this VPInstruction generates scalar values for all lanes.
1146 /// Most VPInstructions generate a single value per part, either vector or
1147 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1148 /// values per all lanes, stemming from an original ingredient. This method
1149 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1150 /// underlying ingredient.
1151 bool doesGeneratePerAllLanes() const;
1152
1153private:
1154 typedef unsigned char OpcodeTy;
1155 OpcodeTy Opcode;
1156
1157 /// An optional name that can be used for the generated IR instruction.
1158 std::string Name;
1159
1160 /// Returns true if we can generate a scalar for the first lane only if
1161 /// needed.
1162 bool canGenerateScalarForFirstLane() const;
1163
1164 /// Utility methods serving execute(): generates a single vector instance of
1165 /// the modeled instruction. \returns the generated value. . In some cases an
1166 /// existing value is returned rather than a generated one.
1167 Value *generate(VPTransformState &State);
1168
1169#if !defined(NDEBUG)
1170 /// Return the number of operands determined by the opcode of the
1171 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1172 /// directly by the opcode.
1173 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1174#endif
1175
1176public:
1177 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1178 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1179 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1180
1181 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1182
1183 VPInstruction *clone() override {
1184 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1185 getDebugLoc(), Name);
1186 if (getUnderlyingValue())
1187 New->setUnderlyingValue(getUnderlyingInstr());
1188 return New;
1189 }
1190
1191 unsigned getOpcode() const { return Opcode; }
1192
1193 /// Generate the instruction.
1194 /// TODO: We currently execute only per-part unless a specific instance is
1195 /// provided.
1196 void execute(VPTransformState &State) override;
1197
1198 /// Return the cost of this VPInstruction.
1199 InstructionCost computeCost(ElementCount VF,
1200 VPCostContext &Ctx) const override;
1201
1202#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1203 /// Print the VPInstruction to dbgs() (for debugging).
1204 LLVM_DUMP_METHOD void dump() const;
1205#endif
1206
1207 bool hasResult() const {
1208 // CallInst may or may not have a result, depending on the called function.
1209 // Conservatively return calls have results for now.
1210 switch (getOpcode()) {
1211 case Instruction::Ret:
1212 case Instruction::Br:
1213 case Instruction::Store:
1214 case Instruction::Switch:
1215 case Instruction::IndirectBr:
1216 case Instruction::Resume:
1217 case Instruction::CatchRet:
1218 case Instruction::Unreachable:
1219 case Instruction::Fence:
1220 case Instruction::AtomicRMW:
1224 return false;
1225 default:
1226 return true;
1227 }
1228 }
1229
1230 /// Returns true if the underlying opcode may read from or write to memory.
1231 bool opcodeMayReadOrWriteFromMemory() const;
1232
1233 /// Returns true if the recipe only uses the first lane of operand \p Op.
1234 bool usesFirstLaneOnly(const VPValue *Op) const override;
1235
1236 /// Returns true if the recipe only uses the first part of operand \p Op.
1237 bool usesFirstPartOnly(const VPValue *Op) const override;
1238
1239 /// Returns true if this VPInstruction produces a scalar value from a vector,
1240 /// e.g. by performing a reduction or extracting a lane.
1241 bool isVectorToScalar() const;
1242
1243 /// Returns true if this VPInstruction's operands are single scalars and the
1244 /// result is also a single scalar.
1245 bool isSingleScalar() const;
1246
1247 /// Returns the symbolic name assigned to the VPInstruction.
1248 StringRef getName() const { return Name; }
1249
1250 /// Set the symbolic name for the VPInstruction.
1251 void setName(StringRef NewName) { Name = NewName.str(); }
1252
1253protected:
1254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1255 /// Print the VPInstruction to \p O.
1256 void printRecipe(raw_ostream &O, const Twine &Indent,
1257 VPSlotTracker &SlotTracker) const override;
1258#endif
1259};
1260
1261/// A specialization of VPInstruction augmenting it with a dedicated result
1262/// type, to be used when the opcode and operands of the VPInstruction don't
1263/// directly determine the result type. Note that there is no separate VPDef ID
1264/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1265/// distinguished purely by the opcode.
1267 /// Scalar result type produced by the recipe.
1268 Type *ResultTy;
1269
1270public:
1272 Type *ResultTy, const VPIRFlags &Flags = {},
1273 const VPIRMetadata &Metadata = {},
1275 const Twine &Name = "")
1276 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1277 ResultTy(ResultTy) {}
1278
1279 static inline bool classof(const VPRecipeBase *R) {
1280 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1281 // type information.
1282 if (R->isScalarCast())
1283 return true;
1284 auto *VPI = dyn_cast<VPInstruction>(R);
1285 if (!VPI)
1286 return false;
1287 switch (VPI->getOpcode()) {
1291 return true;
1292 default:
1293 return false;
1294 }
1295 }
1296
1297 static inline bool classof(const VPUser *R) {
1299 }
1300
1301 VPInstruction *clone() override {
1302 auto *New =
1304 *this, *this, getDebugLoc(), getName());
1305 New->setUnderlyingValue(getUnderlyingValue());
1306 return New;
1307 }
1308
1309 void execute(VPTransformState &State) override;
1310
1311 /// Return the cost of this VPInstruction.
1313 VPCostContext &Ctx) const override {
1314 // TODO: Compute accurate cost after retiring the legacy cost model.
1315 return 0;
1316 }
1317
1318 Type *getResultType() const { return ResultTy; }
1319
1320protected:
1321#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1322 /// Print the recipe.
1323 void printRecipe(raw_ostream &O, const Twine &Indent,
1324 VPSlotTracker &SlotTracker) const override;
1325#endif
1326};
1327
1328/// Helper type to provide functions to access incoming values and blocks for
1329/// phi-like recipes.
1331protected:
1332 /// Return a VPRecipeBase* to the current object.
1333 virtual const VPRecipeBase *getAsRecipe() const = 0;
1334
1335public:
1336 virtual ~VPPhiAccessors() = default;
1337
1338 /// Returns the incoming VPValue with index \p Idx.
1339 VPValue *getIncomingValue(unsigned Idx) const {
1340 return getAsRecipe()->getOperand(Idx);
1341 }
1342
1343 /// Returns the incoming block with index \p Idx.
1344 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1345
1346 /// Returns the number of incoming values, also number of incoming blocks.
1347 virtual unsigned getNumIncoming() const {
1348 return getAsRecipe()->getNumOperands();
1349 }
1350
1351 /// Returns an interator range over the incoming values.
1353 return make_range(getAsRecipe()->op_begin(),
1354 getAsRecipe()->op_begin() + getNumIncoming());
1355 }
1356
1358 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1359
1360 /// Returns an iterator range over the incoming blocks.
1362 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1363 return getIncomingBlock(Idx);
1364 };
1365 return map_range(index_range(0, getNumIncoming()), GetBlock);
1366 }
1367
1368 /// Returns an iterator range over pairs of incoming values and corresponding
1369 /// incoming blocks.
1375
1376 /// Removes the incoming value for \p IncomingBlock, which must be a
1377 /// predecessor.
1378 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1379
1380#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1381 /// Print the recipe.
1383#endif
1384};
1385
1387 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1388 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1389
1390 static inline bool classof(const VPUser *U) {
1391 auto *VPI = dyn_cast<VPInstruction>(U);
1392 return VPI && VPI->getOpcode() == Instruction::PHI;
1393 }
1394
1395 static inline bool classof(const VPValue *V) {
1396 auto *VPI = dyn_cast<VPInstruction>(V);
1397 return VPI && VPI->getOpcode() == Instruction::PHI;
1398 }
1399
1400 static inline bool classof(const VPSingleDefRecipe *SDR) {
1401 auto *VPI = dyn_cast<VPInstruction>(SDR);
1402 return VPI && VPI->getOpcode() == Instruction::PHI;
1403 }
1404
1405 VPPhi *clone() override {
1406 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1407 PhiR->setUnderlyingValue(getUnderlyingValue());
1408 return PhiR;
1409 }
1410
1411 void execute(VPTransformState &State) override;
1412
1413protected:
1414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1415 /// Print the recipe.
1416 void printRecipe(raw_ostream &O, const Twine &Indent,
1417 VPSlotTracker &SlotTracker) const override;
1418#endif
1419
1420 const VPRecipeBase *getAsRecipe() const override { return this; }
1421};
1422
1423/// A recipe to wrap on original IR instruction not to be modified during
1424/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1425/// Expect PHIs, VPIRInstructions cannot have any operands.
1427 Instruction &I;
1428
1429protected:
1430 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1431 /// subclasses may need to be created, e.g. VPIRPhi.
1433 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1434
1435public:
1436 ~VPIRInstruction() override = default;
1437
1438 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1439 /// VPIRInstruction.
1441
1442 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1443
1445 auto *R = create(I);
1446 for (auto *Op : operands())
1447 R->addOperand(Op);
1448 return R;
1449 }
1450
1451 void execute(VPTransformState &State) override;
1452
1453 /// Return the cost of this VPIRInstruction.
1455 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1456
1457 Instruction &getInstruction() const { return I; }
1458
1459 bool usesScalars(const VPValue *Op) const override {
1461 "Op must be an operand of the recipe");
1462 return true;
1463 }
1464
1465 bool usesFirstPartOnly(const VPValue *Op) const override {
1467 "Op must be an operand of the recipe");
1468 return true;
1469 }
1470
1471 bool usesFirstLaneOnly(const VPValue *Op) const override {
1473 "Op must be an operand of the recipe");
1474 return true;
1475 }
1476
1477 /// Update the recipe's first operand to the last lane of the last part of the
1478 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1479 /// least one operand wrapping a PHINode.
1481
1482protected:
1483#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1484 /// Print the recipe.
1485 void printRecipe(raw_ostream &O, const Twine &Indent,
1486 VPSlotTracker &SlotTracker) const override;
1487#endif
1488};
1489
1490/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1491/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1492/// allowed, and it is used to add a new incoming value for the single
1493/// predecessor VPBB.
1495 public VPPhiAccessors {
1497
1498 static inline bool classof(const VPRecipeBase *U) {
1499 auto *R = dyn_cast<VPIRInstruction>(U);
1500 return R && isa<PHINode>(R->getInstruction());
1501 }
1502
1504
1505 void execute(VPTransformState &State) override;
1506
1507protected:
1508#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1509 /// Print the recipe.
1510 void printRecipe(raw_ostream &O, const Twine &Indent,
1511 VPSlotTracker &SlotTracker) const override;
1512#endif
1513
1514 const VPRecipeBase *getAsRecipe() const override { return this; }
1515};
1516
1517/// VPWidenRecipe is a recipe for producing a widened instruction using the
1518/// opcode and operands of the recipe. This recipe covers most of the
1519/// traditional vectorization cases where each recipe transforms into a
1520/// vectorized version of itself.
1522 public VPIRMetadata {
1523 unsigned Opcode;
1524
1525public:
1527 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1528 DebugLoc DL = {})
1529 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1530 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1531 setUnderlyingValue(&I);
1532 }
1533
1534 ~VPWidenRecipe() override = default;
1535
1536 VPWidenRecipe *clone() override {
1537 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1538 getDebugLoc());
1539 }
1540
1541 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1542
1543 /// Produce a widened instruction using the opcode and operands of the recipe,
1544 /// processing State.VF elements.
1545 void execute(VPTransformState &State) override;
1546
1547 /// Return the cost of this VPWidenRecipe.
1548 InstructionCost computeCost(ElementCount VF,
1549 VPCostContext &Ctx) const override;
1550
1551 unsigned getOpcode() const { return Opcode; }
1552
1553protected:
1554#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1555 /// Print the recipe.
1556 void printRecipe(raw_ostream &O, const Twine &Indent,
1557 VPSlotTracker &SlotTracker) const override;
1558#endif
1559
1560 /// Returns true if the recipe only uses the first lane of operand \p Op.
1561 bool usesFirstLaneOnly(const VPValue *Op) const override {
1563 "Op must be an operand of the recipe");
1564 return Opcode == Instruction::Select && Op == getOperand(0) &&
1565 Op->isDefinedOutsideLoopRegions();
1566 }
1567};
1568
1569/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1571 /// Cast instruction opcode.
1572 Instruction::CastOps Opcode;
1573
1574 /// Result type for the cast.
1575 Type *ResultTy;
1576
1577public:
1579 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1580 const VPIRMetadata &Metadata = {},
1582 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1583 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1584 assert(flagsValidForOpcode(Opcode) &&
1585 "Set flags not supported for the provided opcode");
1587 }
1588
1589 ~VPWidenCastRecipe() override = default;
1590
1592 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1594 *this, *this, getDebugLoc());
1595 }
1596
1597 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1598
1599 /// Produce widened copies of the cast.
1600 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1601
1602 /// Return the cost of this VPWidenCastRecipe.
1604 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1605
1606 Instruction::CastOps getOpcode() const { return Opcode; }
1607
1608 /// Returns the result type of the cast.
1609 Type *getResultType() const { return ResultTy; }
1610
1611protected:
1612#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1613 /// Print the recipe.
1614 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1615 VPSlotTracker &SlotTracker) const override;
1616#endif
1617};
1618
1619/// A recipe for widening vector intrinsics.
1621 /// ID of the vector intrinsic to widen.
1622 Intrinsic::ID VectorIntrinsicID;
1623
1624 /// Scalar return type of the intrinsic.
1625 Type *ResultTy;
1626
1627 /// True if the intrinsic may read from memory.
1628 bool MayReadFromMemory;
1629
1630 /// True if the intrinsic may read write to memory.
1631 bool MayWriteToMemory;
1632
1633 /// True if the intrinsic may have side-effects.
1634 bool MayHaveSideEffects;
1635
1636public:
1638 ArrayRef<VPValue *> CallArguments, Type *Ty,
1639 const VPIRFlags &Flags = {},
1640 const VPIRMetadata &MD = {},
1642 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1643 DL),
1644 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1645 MayReadFromMemory(CI.mayReadFromMemory()),
1646 MayWriteToMemory(CI.mayWriteToMemory()),
1647 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1648 setUnderlyingValue(&CI);
1649 }
1650
1652 ArrayRef<VPValue *> CallArguments, Type *Ty,
1653 const VPIRFlags &Flags = {},
1654 const VPIRMetadata &Metadata = {},
1656 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1657 DL),
1658 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1659 ResultTy(Ty) {
1660 LLVMContext &Ctx = Ty->getContext();
1661 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1662 MemoryEffects ME = Attrs.getMemoryEffects();
1663 MayReadFromMemory = !ME.onlyWritesMemory();
1664 MayWriteToMemory = !ME.onlyReadsMemory();
1665 MayHaveSideEffects = MayWriteToMemory ||
1666 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1667 !Attrs.hasAttribute(Attribute::WillReturn);
1668 }
1669
1670 ~VPWidenIntrinsicRecipe() override = default;
1671
1673 if (Value *CI = getUnderlyingValue())
1674 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1675 operands(), ResultTy, *this, *this,
1676 getDebugLoc());
1677 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1678 *this, *this, getDebugLoc());
1679 }
1680
1681 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1682
1683 /// Produce a widened version of the vector intrinsic.
1684 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1685
1686 /// Return the cost of this vector intrinsic.
1688 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1689
1690 /// Return the ID of the intrinsic.
1691 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1692
1693 /// Return the scalar return type of the intrinsic.
1694 Type *getResultType() const { return ResultTy; }
1695
1696 /// Return to name of the intrinsic as string.
1698
1699 /// Returns true if the intrinsic may read from memory.
1700 bool mayReadFromMemory() const { return MayReadFromMemory; }
1701
1702 /// Returns true if the intrinsic may write to memory.
1703 bool mayWriteToMemory() const { return MayWriteToMemory; }
1704
1705 /// Returns true if the intrinsic may have side-effects.
1706 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1707
1708 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1709
1710protected:
1711#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1712 /// Print the recipe.
1713 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1714 VPSlotTracker &SlotTracker) const override;
1715#endif
1716};
1717
1718/// A recipe for widening Call instructions using library calls.
1720 public VPIRMetadata {
1721 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1722 /// between a given VF and the chosen vectorized variant, so there will be a
1723 /// different VPlan for each VF with a valid variant.
1724 Function *Variant;
1725
1726public:
1728 ArrayRef<VPValue *> CallArguments,
1729 const VPIRFlags &Flags = {},
1730 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1731 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1732 VPIRMetadata(Metadata), Variant(Variant) {
1733 setUnderlyingValue(UV);
1734 assert(
1735 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1736 "last operand must be the called function");
1737 }
1738
1739 ~VPWidenCallRecipe() override = default;
1740
1742 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1743 *this, *this, getDebugLoc());
1744 }
1745
1746 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1747
1748 /// Produce a widened version of the call instruction.
1749 void execute(VPTransformState &State) override;
1750
1751 /// Return the cost of this VPWidenCallRecipe.
1752 InstructionCost computeCost(ElementCount VF,
1753 VPCostContext &Ctx) const override;
1754
1758
1761
1762protected:
1763#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1764 /// Print the recipe.
1765 void printRecipe(raw_ostream &O, const Twine &Indent,
1766 VPSlotTracker &SlotTracker) const override;
1767#endif
1768};
1769
1770/// A recipe representing a sequence of load -> update -> store as part of
1771/// a histogram operation. This means there may be aliasing between vector
1772/// lanes, which is handled by the llvm.experimental.vector.histogram family
1773/// of intrinsics. The only update operations currently supported are
1774/// 'add' and 'sub' where the other term is loop-invariant.
1776 /// Opcode of the update operation, currently either add or sub.
1777 unsigned Opcode;
1778
1779public:
1780 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1782 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1783
1784 ~VPHistogramRecipe() override = default;
1785
1787 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1788 }
1789
1790 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1791
1792 /// Produce a vectorized histogram operation.
1793 void execute(VPTransformState &State) override;
1794
1795 /// Return the cost of this VPHistogramRecipe.
1797 VPCostContext &Ctx) const override;
1798
1799 unsigned getOpcode() const { return Opcode; }
1800
1801 /// Return the mask operand if one was provided, or a null pointer if all
1802 /// lanes should be executed unconditionally.
1803 VPValue *getMask() const {
1804 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1805 }
1806
1807protected:
1808#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1809 /// Print the recipe
1810 void printRecipe(raw_ostream &O, const Twine &Indent,
1811 VPSlotTracker &SlotTracker) const override;
1812#endif
1813};
1814
1815/// A recipe for handling GEP instructions.
1817 Type *SourceElementTy;
1818
1819 bool isPointerLoopInvariant() const {
1820 return getOperand(0)->isDefinedOutsideLoopRegions();
1821 }
1822
1823 bool isIndexLoopInvariant(unsigned I) const {
1824 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1825 }
1826
1827public:
1829 const VPIRFlags &Flags = {},
1831 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1832 SourceElementTy(GEP->getSourceElementType()) {
1833 setUnderlyingValue(GEP);
1835 (void)Metadata;
1837 assert(Metadata.empty() && "unexpected metadata on GEP");
1838 }
1839
1840 ~VPWidenGEPRecipe() override = default;
1841
1844 operands(), *this, getDebugLoc());
1845 }
1846
1847 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1848
1849 /// This recipe generates a GEP instruction.
1850 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1851
1852 /// Generate the gep nodes.
1853 void execute(VPTransformState &State) override;
1854
1855 Type *getSourceElementType() const { return SourceElementTy; }
1856
1857 /// Return the cost of this VPWidenGEPRecipe.
1859 VPCostContext &Ctx) const override {
1860 // TODO: Compute accurate cost after retiring the legacy cost model.
1861 return 0;
1862 }
1863
1864 /// Returns true if the recipe only uses the first lane of operand \p Op.
1865 bool usesFirstLaneOnly(const VPValue *Op) const override;
1866
1867protected:
1868#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1869 /// Print the recipe.
1870 void printRecipe(raw_ostream &O, const Twine &Indent,
1871 VPSlotTracker &SlotTracker) const override;
1872#endif
1873};
1874
1875/// A recipe to compute a pointer to the last element of each part of a widened
1876/// memory access for widened memory accesses of IndexedTy. Used for
1877/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1879 public VPUnrollPartAccessor<2> {
1880 Type *IndexedTy;
1881
1882 /// The constant stride of the pointer computed by this recipe, expressed in
1883 /// units of IndexedTy.
1884 int64_t Stride;
1885
1886public:
1888 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1889 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1890 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1891 IndexedTy(IndexedTy), Stride(Stride) {
1892 assert(Stride < 0 && "Stride must be negative");
1893 }
1894
1895 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1896
1898 const VPValue *getVFValue() const { return getOperand(1); }
1899
1900 void execute(VPTransformState &State) override;
1901
1902 bool usesFirstLaneOnly(const VPValue *Op) const override {
1904 "Op must be an operand of the recipe");
1905 return true;
1906 }
1907
1908 /// Return the cost of this VPVectorPointerRecipe.
1910 VPCostContext &Ctx) const override {
1911 // TODO: Compute accurate cost after retiring the legacy cost model.
1912 return 0;
1913 }
1914
1915 /// Returns true if the recipe only uses the first part of operand \p Op.
1916 bool usesFirstPartOnly(const VPValue *Op) const override {
1918 "Op must be an operand of the recipe");
1919 assert(getNumOperands() <= 2 && "must have at most two operands");
1920 return true;
1921 }
1922
1924 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1925 Stride, getGEPNoWrapFlags(),
1926 getDebugLoc());
1927 }
1928
1929protected:
1930#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1931 /// Print the recipe.
1932 void printRecipe(raw_ostream &O, const Twine &Indent,
1933 VPSlotTracker &SlotTracker) const override;
1934#endif
1935};
1936
1937/// A recipe to compute the pointers for widened memory accesses of \p
1938/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
1939/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
1941 Type *SourceElementTy;
1942
1943public:
1944 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1946 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
1947 SourceElementTy(SourceElementTy) {}
1948
1949 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1950
1952 return getNumOperands() == 2 ? getOperand(1) : nullptr;
1953 }
1954
1955 void execute(VPTransformState &State) override;
1956
1957 Type *getSourceElementType() const { return SourceElementTy; }
1958
1959 bool usesFirstLaneOnly(const VPValue *Op) const override {
1961 "Op must be an operand of the recipe");
1962 return true;
1963 }
1964
1965 /// Returns true if the recipe only uses the first part of operand \p Op.
1966 bool usesFirstPartOnly(const VPValue *Op) const override {
1968 "Op must be an operand of the recipe");
1969 assert(getNumOperands() <= 2 && "must have at most two operands");
1970 return true;
1971 }
1972
1974 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
1976 if (auto *Off = getOffset())
1977 Clone->addOperand(Off);
1978 return Clone;
1979 }
1980
1981 /// Return the cost of this VPHeaderPHIRecipe.
1983 VPCostContext &Ctx) const override {
1984 // TODO: Compute accurate cost after retiring the legacy cost model.
1985 return 0;
1986 }
1987
1988protected:
1989#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1990 /// Print the recipe.
1991 void printRecipe(raw_ostream &O, const Twine &Indent,
1992 VPSlotTracker &SlotTracker) const override;
1993#endif
1994};
1995
1996/// A pure virtual base class for all recipes modeling header phis, including
1997/// phis for first order recurrences, pointer inductions and reductions. The
1998/// start value is the first operand of the recipe and the incoming value from
1999/// the backedge is the second operand.
2000///
2001/// Inductions are modeled using the following sub-classes:
2002/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2003/// starting at a specified value (zero for the main vector loop, the resume
2004/// value for the epilogue vector loop) and stepping by 1. The induction
2005/// controls exiting of the vector loop by comparing against the vector trip
2006/// count. Produces a single scalar PHI for the induction value per
2007/// iteration.
2008/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2009/// floating point inductions with arbitrary start and step values. Produces
2010/// a vector PHI per-part.
2011/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2012/// value of an IV with different start and step values. Produces a single
2013/// scalar value per iteration
2014/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2015/// canonical or derived induction.
2016/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2017/// pointer induction. Produces either a vector PHI per-part or scalar values
2018/// per-lane based on the canonical induction.
2020 public VPPhiAccessors {
2021protected:
2022 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2023 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2024 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2025 UnderlyingInstr, DL) {}
2026
2027 const VPRecipeBase *getAsRecipe() const override { return this; }
2028
2029public:
2030 ~VPHeaderPHIRecipe() override = default;
2031
2032 /// Method to support type inquiry through isa, cast, and dyn_cast.
2033 static inline bool classof(const VPRecipeBase *R) {
2034 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2035 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2036 }
2037 static inline bool classof(const VPValue *V) {
2038 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2039 }
2040 static inline bool classof(const VPSingleDefRecipe *R) {
2041 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2042 }
2043
2044 /// Generate the phi nodes.
2045 void execute(VPTransformState &State) override = 0;
2046
2047 /// Return the cost of this header phi recipe.
2049 VPCostContext &Ctx) const override;
2050
2051 /// Returns the start value of the phi, if one is set.
2053 return getNumOperands() == 0 ? nullptr : getOperand(0);
2054 }
2056 return getNumOperands() == 0 ? nullptr : getOperand(0);
2057 }
2058
2059 /// Update the start value of the recipe.
2061
2062 /// Returns the incoming value from the loop backedge.
2064 return getOperand(1);
2065 }
2066
2067 /// Update the incoming value from the loop backedge.
2069
2070 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2071 /// to be a recipe.
2073 return *getBackedgeValue()->getDefiningRecipe();
2074 }
2075
2076protected:
2077#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2078 /// Print the recipe.
2079 void printRecipe(raw_ostream &O, const Twine &Indent,
2080 VPSlotTracker &SlotTracker) const override = 0;
2081#endif
2082};
2083
2084/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2085/// VPWidenPointerInductionRecipe), providing shared functionality, including
2086/// retrieving the step value, induction descriptor and original phi node.
2088 const InductionDescriptor &IndDesc;
2089
2090public:
2091 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2092 VPValue *Step, const InductionDescriptor &IndDesc,
2093 DebugLoc DL)
2094 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2095 addOperand(Step);
2096 }
2097
2098 static inline bool classof(const VPRecipeBase *R) {
2099 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2100 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2101 }
2102
2103 static inline bool classof(const VPValue *V) {
2104 auto *R = V->getDefiningRecipe();
2105 return R && classof(R);
2106 }
2107
2108 static inline bool classof(const VPSingleDefRecipe *R) {
2109 return classof(static_cast<const VPRecipeBase *>(R));
2110 }
2111
2112 void execute(VPTransformState &State) override = 0;
2113
2114 /// Returns the start value of the induction.
2116
2117 /// Returns the step value of the induction.
2119 const VPValue *getStepValue() const { return getOperand(1); }
2120
2121 /// Update the step value of the recipe.
2122 void setStepValue(VPValue *V) { setOperand(1, V); }
2123
2125 const VPValue *getVFValue() const { return getOperand(2); }
2126
2127 /// Returns the number of incoming values, also number of incoming blocks.
2128 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2129 /// incoming value, its start value.
2130 unsigned getNumIncoming() const override { return 1; }
2131
2133
2134 /// Returns the induction descriptor for the recipe.
2135 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2136
2138 // TODO: All operands of base recipe must exist and be at same index in
2139 // derived recipe.
2141 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2142 }
2143
2145 // TODO: All operands of base recipe must exist and be at same index in
2146 // derived recipe.
2148 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2149 }
2150
2151 /// Returns true if the recipe only uses the first lane of operand \p Op.
2152 bool usesFirstLaneOnly(const VPValue *Op) const override {
2154 "Op must be an operand of the recipe");
2155 // The recipe creates its own wide start value, so it only requests the
2156 // first lane of the operand.
2157 // TODO: Remove once creating the start value is modeled separately.
2158 return Op == getStartValue() || Op == getStepValue();
2159 }
2160};
2161
2162/// A recipe for handling phi nodes of integer and floating-point inductions,
2163/// producing their vector values. This is an abstract recipe and must be
2164/// converted to concrete recipes before executing.
2166 public VPIRFlags {
2167 TruncInst *Trunc;
2168
2169 // If this recipe is unrolled it will have 2 additional operands.
2170 bool isUnrolled() const { return getNumOperands() == 5; }
2171
2172public:
2174 VPValue *VF, const InductionDescriptor &IndDesc,
2175 const VPIRFlags &Flags, DebugLoc DL)
2176 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2177 Step, IndDesc, DL),
2178 VPIRFlags(Flags), Trunc(nullptr) {
2179 addOperand(VF);
2180 }
2181
2183 VPValue *VF, const InductionDescriptor &IndDesc,
2184 TruncInst *Trunc, const VPIRFlags &Flags,
2185 DebugLoc DL)
2186 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2187 Step, IndDesc, DL),
2188 VPIRFlags(Flags), Trunc(Trunc) {
2189 addOperand(VF);
2191 (void)Metadata;
2192 if (Trunc)
2194 assert(Metadata.empty() && "unexpected metadata on Trunc");
2195 }
2196
2198
2204
2205 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2206
2207 void execute(VPTransformState &State) override {
2208 llvm_unreachable("cannot execute this recipe, should be expanded via "
2209 "expandVPWidenIntOrFpInductionRecipe");
2210 }
2211
2212 /// Returns the start value of the induction.
2214
2216 // If the recipe has been unrolled return the VPValue for the induction
2217 // increment.
2218 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2219 }
2220
2221 /// Returns the number of incoming values, also number of incoming blocks.
2222 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2223 /// incoming value, its start value.
2224 unsigned getNumIncoming() const override { return 1; }
2225
2226 /// Returns the first defined value as TruncInst, if it is one or nullptr
2227 /// otherwise.
2228 TruncInst *getTruncInst() { return Trunc; }
2229 const TruncInst *getTruncInst() const { return Trunc; }
2230
2231 /// Returns true if the induction is canonical, i.e. starting at 0 and
2232 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2233 /// same type as the canonical induction.
2234 bool isCanonical() const;
2235
2236 /// Returns the scalar type of the induction.
2238 return Trunc ? Trunc->getType() : getStartValue()->getType();
2239 }
2240
2241 /// Returns the VPValue representing the value of this induction at
2242 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2243 /// take place.
2245 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2246 }
2247
2248protected:
2249#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2250 /// Print the recipe.
2251 void printRecipe(raw_ostream &O, const Twine &Indent,
2252 VPSlotTracker &SlotTracker) const override;
2253#endif
2254};
2255
2257public:
2258 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2259 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2260 /// VF*UF.
2262 VPValue *NumUnrolledElems,
2263 const InductionDescriptor &IndDesc, DebugLoc DL)
2264 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2265 Step, IndDesc, DL) {
2266 addOperand(NumUnrolledElems);
2267 }
2268
2270
2276
2277 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2278
2279 /// Generate vector values for the pointer induction.
2280 void execute(VPTransformState &State) override {
2281 llvm_unreachable("cannot execute this recipe, should be expanded via "
2282 "expandVPWidenPointerInduction");
2283 };
2284
2285 /// Returns true if only scalar values will be generated.
2286 bool onlyScalarsGenerated(bool IsScalable);
2287
2288protected:
2289#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2290 /// Print the recipe.
2291 void printRecipe(raw_ostream &O, const Twine &Indent,
2292 VPSlotTracker &SlotTracker) const override;
2293#endif
2294};
2295
2296/// A recipe for widened phis. Incoming values are operands of the recipe and
2297/// their operand index corresponds to the incoming predecessor block. If the
2298/// recipe is placed in an entry block to a (non-replicate) region, it must have
2299/// exactly 2 incoming values, the first from the predecessor of the region and
2300/// the second from the exiting block of the region.
2302 public VPPhiAccessors {
2303 /// Name to use for the generated IR instruction for the widened phi.
2304 std::string Name;
2305
2306public:
2307 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2308 /// debug location \p DL.
2309 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2310 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2311 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2312 if (Start)
2313 addOperand(Start);
2314 }
2315
2318 getOperand(0), getDebugLoc(), Name);
2320 C->addOperand(Op);
2321 return C;
2322 }
2323
2324 ~VPWidenPHIRecipe() override = default;
2325
2326 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2327
2328 /// Generate the phi/select nodes.
2329 void execute(VPTransformState &State) override;
2330
2331protected:
2332#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2333 /// Print the recipe.
2334 void printRecipe(raw_ostream &O, const Twine &Indent,
2335 VPSlotTracker &SlotTracker) const override;
2336#endif
2337
2338 const VPRecipeBase *getAsRecipe() const override { return this; }
2339};
2340
2341/// A recipe for handling first-order recurrence phis. The start value is the
2342/// first operand of the recipe and the incoming value from the backedge is the
2343/// second operand.
2346 VPValue &BackedgeValue)
2347 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {
2348 addOperand(&BackedgeValue);
2349 }
2350
2351 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2352
2357
2358 void execute(VPTransformState &State) override;
2359
2360 /// Return the cost of this first-order recurrence phi recipe.
2362 VPCostContext &Ctx) const override;
2363
2364 /// Returns true if the recipe only uses the first lane of operand \p Op.
2365 bool usesFirstLaneOnly(const VPValue *Op) const override {
2367 "Op must be an operand of the recipe");
2368 return Op == getStartValue();
2369 }
2370
2371protected:
2372#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2373 /// Print the recipe.
2374 void printRecipe(raw_ostream &O, const Twine &Indent,
2375 VPSlotTracker &SlotTracker) const override;
2376#endif
2377};
2378
2379/// Possible variants of a reduction.
2380
2381/// This reduction is ordered and in-loop.
2382struct RdxOrdered {};
2383/// This reduction is in-loop.
2384struct RdxInLoop {};
2385/// This reduction is unordered with the partial result scaled down by some
2386/// factor.
2389};
2390using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2391
2392inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2393 unsigned ScaleFactor) {
2394 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2395 if (Ordered)
2396 return RdxOrdered{};
2397 if (InLoop)
2398 return RdxInLoop{};
2399 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2400}
2401
2402/// A recipe for handling reduction phis. The start value is the first operand
2403/// of the recipe and the incoming value from the backedge is the second
2404/// operand.
2406 public VPUnrollPartAccessor<2> {
2407 /// The recurrence kind of the reduction.
2408 const RecurKind Kind;
2409
2410 ReductionStyle Style;
2411
2412 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2413 /// patterns for argmin/argmax).
2414 /// TODO: Also support cases where the phi itself has a single use, but its
2415 /// compare has multiple uses.
2416 bool HasUsesOutsideReductionChain;
2417
2418public:
2419 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2421 VPValue &BackedgeValue, ReductionStyle Style,
2422 bool HasUsesOutsideReductionChain = false)
2423 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2424 Style(Style),
2425 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2426 addOperand(&BackedgeValue);
2427 }
2428
2429 ~VPReductionPHIRecipe() override = default;
2430
2432 return new VPReductionPHIRecipe(
2434 *getOperand(0), *getBackedgeValue(), Style,
2435 HasUsesOutsideReductionChain);
2436 }
2437
2438 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2439
2440 /// Generate the phi/select nodes.
2441 void execute(VPTransformState &State) override;
2442
2443 /// Get the factor that the VF of this recipe's output should be scaled by, or
2444 /// 1 if it isn't scaled.
2445 unsigned getVFScaleFactor() const {
2446 auto *Partial = std::get_if<RdxUnordered>(&Style);
2447 return Partial ? Partial->VFScaleFactor : 1;
2448 }
2449
2450 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2451 /// > 1.
2452 void setVFScaleFactor(unsigned ScaleFactor) {
2453 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2454 Style = RdxUnordered{ScaleFactor};
2455 }
2456
2457 /// Returns the number of incoming values, also number of incoming blocks.
2458 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2459 /// incoming value, its start value.
2460 unsigned getNumIncoming() const override { return 2; }
2461
2462 /// Returns the recurrence kind of the reduction.
2463 RecurKind getRecurrenceKind() const { return Kind; }
2464
2465 /// Returns true, if the phi is part of an ordered reduction.
2466 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2467
2468 /// Returns true if the phi is part of an in-loop reduction.
2469 bool isInLoop() const {
2470 return std::holds_alternative<RdxInLoop>(Style) ||
2471 std::holds_alternative<RdxOrdered>(Style);
2472 }
2473
2474 /// Returns true if the reduction outputs a vector with a scaled down VF.
2475 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2476
2477 /// Returns true, if the phi is part of a multi-use reduction.
2479 return HasUsesOutsideReductionChain;
2480 }
2481
2482 /// Returns true if the recipe only uses the first lane of operand \p Op.
2483 bool usesFirstLaneOnly(const VPValue *Op) const override {
2485 "Op must be an operand of the recipe");
2486 return isOrdered() || isInLoop();
2487 }
2488
2489protected:
2490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2491 /// Print the recipe.
2492 void printRecipe(raw_ostream &O, const Twine &Indent,
2493 VPSlotTracker &SlotTracker) const override;
2494#endif
2495};
2496
2497/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2498/// instructions.
2500public:
2501 /// The blend operation is a User of the incoming values and of their
2502 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2503 /// be omitted (implied by passing an odd number of operands) in which case
2504 /// all other incoming values are merged into it.
2506 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2507 assert(Operands.size() >= 2 && "Expected at least two operands!");
2508 }
2509
2514
2515 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2516
2517 /// A normalized blend is one that has an odd number of operands, whereby the
2518 /// first operand does not have an associated mask.
2519 bool isNormalized() const { return getNumOperands() % 2; }
2520
2521 /// Return the number of incoming values, taking into account when normalized
2522 /// the first incoming value will have no mask.
2523 unsigned getNumIncomingValues() const {
2524 return (getNumOperands() + isNormalized()) / 2;
2525 }
2526
2527 /// Return incoming value number \p Idx.
2528 VPValue *getIncomingValue(unsigned Idx) const {
2529 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2530 }
2531
2532 /// Return mask number \p Idx.
2533 VPValue *getMask(unsigned Idx) const {
2534 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2535 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2536 }
2537
2538 /// Set mask number \p Idx to \p V.
2539 void setMask(unsigned Idx, VPValue *V) {
2540 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2541 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2542 }
2543
2544 void execute(VPTransformState &State) override {
2545 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2546 }
2547
2548 /// Return the cost of this VPWidenMemoryRecipe.
2549 InstructionCost computeCost(ElementCount VF,
2550 VPCostContext &Ctx) const override;
2551
2552 /// Returns true if the recipe only uses the first lane of operand \p Op.
2553 bool usesFirstLaneOnly(const VPValue *Op) const override {
2555 "Op must be an operand of the recipe");
2556 // Recursing through Blend recipes only, must terminate at header phi's the
2557 // latest.
2558 return all_of(users(),
2559 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2560 }
2561
2562protected:
2563#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2564 /// Print the recipe.
2565 void printRecipe(raw_ostream &O, const Twine &Indent,
2566 VPSlotTracker &SlotTracker) const override;
2567#endif
2568};
2569
2570/// A common base class for interleaved memory operations.
2571/// An Interleaved memory operation is a memory access method that combines
2572/// multiple strided loads/stores into a single wide load/store with shuffles.
2573/// The first operand is the start address. The optional operands are, in order,
2574/// the stored values and the mask.
2576 public VPIRMetadata {
2578
2579 /// Indicates if the interleave group is in a conditional block and requires a
2580 /// mask.
2581 bool HasMask = false;
2582
2583 /// Indicates if gaps between members of the group need to be masked out or if
2584 /// unusued gaps can be loaded speculatively.
2585 bool NeedsMaskForGaps = false;
2586
2587protected:
2588 VPInterleaveBase(const unsigned char SC,
2590 ArrayRef<VPValue *> Operands,
2591 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2592 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2593 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2594 NeedsMaskForGaps(NeedsMaskForGaps) {
2595 // TODO: extend the masked interleaved-group support to reversed access.
2596 assert((!Mask || !IG->isReverse()) &&
2597 "Reversed masked interleave-group not supported.");
2598 for (unsigned I = 0; I < IG->getFactor(); ++I)
2599 if (Instruction *Inst = IG->getMember(I)) {
2600 if (Inst->getType()->isVoidTy())
2601 continue;
2602 new VPRecipeValue(this, Inst);
2603 }
2604
2605 for (auto *SV : StoredValues)
2606 addOperand(SV);
2607 if (Mask) {
2608 HasMask = true;
2609 addOperand(Mask);
2610 }
2611 }
2612
2613public:
2614 VPInterleaveBase *clone() override = 0;
2615
2616 static inline bool classof(const VPRecipeBase *R) {
2617 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2618 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2619 }
2620
2621 static inline bool classof(const VPUser *U) {
2622 auto *R = dyn_cast<VPRecipeBase>(U);
2623 return R && classof(R);
2624 }
2625
2626 /// Return the address accessed by this recipe.
2627 VPValue *getAddr() const {
2628 return getOperand(0); // Address is the 1st, mandatory operand.
2629 }
2630
2631 /// Return the mask used by this recipe. Note that a full mask is represented
2632 /// by a nullptr.
2633 VPValue *getMask() const {
2634 // Mask is optional and the last operand.
2635 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2636 }
2637
2638 /// Return true if the access needs a mask because of the gaps.
2639 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2640
2642
2643 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2644
2645 void execute(VPTransformState &State) override {
2646 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2647 }
2648
2649 /// Return the cost of this recipe.
2650 InstructionCost computeCost(ElementCount VF,
2651 VPCostContext &Ctx) const override;
2652
2653 /// Returns true if the recipe only uses the first lane of operand \p Op.
2654 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2655
2656 /// Returns the number of stored operands of this interleave group. Returns 0
2657 /// for load interleave groups.
2658 virtual unsigned getNumStoreOperands() const = 0;
2659
2660 /// Return the VPValues stored by this interleave group. If it is a load
2661 /// interleave group, return an empty ArrayRef.
2663 return ArrayRef<VPValue *>(op_end() -
2664 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2666 }
2667};
2668
2669/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2670/// or stores into one wide load/store and shuffles. The first operand of a
2671/// VPInterleave recipe is the address, followed by the stored values, followed
2672/// by an optional mask.
2674public:
2676 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2677 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2678 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2679 NeedsMaskForGaps, MD, DL) {}
2680
2681 ~VPInterleaveRecipe() override = default;
2682
2686 needsMaskForGaps(), *this, getDebugLoc());
2687 }
2688
2689 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2690
2691 /// Generate the wide load or store, and shuffles.
2692 void execute(VPTransformState &State) override;
2693
2694 bool usesFirstLaneOnly(const VPValue *Op) const override {
2696 "Op must be an operand of the recipe");
2697 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2698 }
2699
2700 unsigned getNumStoreOperands() const override {
2701 return getNumOperands() - (getMask() ? 2 : 1);
2702 }
2703
2704protected:
2705#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2706 /// Print the recipe.
2707 void printRecipe(raw_ostream &O, const Twine &Indent,
2708 VPSlotTracker &SlotTracker) const override;
2709#endif
2710};
2711
2712/// A recipe for interleaved memory operations with vector-predication
2713/// intrinsics. The first operand is the address, the second operand is the
2714/// explicit vector length. Stored values and mask are optional operands.
2716public:
2718 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2719 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2720 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2721 R.getDebugLoc()) {
2722 assert(!getInterleaveGroup()->isReverse() &&
2723 "Reversed interleave-group with tail folding is not supported.");
2724 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2725 "supported for scalable vector.");
2726 }
2727
2728 ~VPInterleaveEVLRecipe() override = default;
2729
2731 llvm_unreachable("cloning not implemented yet");
2732 }
2733
2734 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2735
2736 /// The VPValue of the explicit vector length.
2737 VPValue *getEVL() const { return getOperand(1); }
2738
2739 /// Generate the wide load or store, and shuffles.
2740 void execute(VPTransformState &State) override;
2741
2742 /// The recipe only uses the first lane of the address, and EVL operand.
2743 bool usesFirstLaneOnly(const VPValue *Op) const override {
2745 "Op must be an operand of the recipe");
2746 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2747 Op == getEVL();
2748 }
2749
2750 unsigned getNumStoreOperands() const override {
2751 return getNumOperands() - (getMask() ? 3 : 2);
2752 }
2753
2754protected:
2755#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2756 /// Print the recipe.
2757 void printRecipe(raw_ostream &O, const Twine &Indent,
2758 VPSlotTracker &SlotTracker) const override;
2759#endif
2760};
2761
2762/// A recipe to represent inloop, ordered or partial reduction operations. It
2763/// performs a reduction on a vector operand into a scalar (vector in the case
2764/// of a partial reduction) value, and adds the result to a chain. The Operands
2765/// are {ChainOp, VecOp, [Condition]}.
2767
2768 /// The recurrence kind for the reduction in question.
2769 RecurKind RdxKind;
2770 /// Whether the reduction is conditional.
2771 bool IsConditional = false;
2772 ReductionStyle Style;
2773
2774protected:
2775 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2777 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2778 ReductionStyle Style, DebugLoc DL)
2779 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2780 Style(Style) {
2781 if (CondOp) {
2782 IsConditional = true;
2783 addOperand(CondOp);
2784 }
2786 }
2787
2788public:
2790 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2792 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2793 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2794 DL) {}
2795
2797 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2799 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2800 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2801 DL) {}
2802
2803 ~VPReductionRecipe() override = default;
2804
2806 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2808 getCondOp(), Style, getDebugLoc());
2809 }
2810
2811 static inline bool classof(const VPRecipeBase *R) {
2812 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2813 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2814 }
2815
2816 static inline bool classof(const VPUser *U) {
2817 auto *R = dyn_cast<VPRecipeBase>(U);
2818 return R && classof(R);
2819 }
2820
2821 static inline bool classof(const VPValue *VPV) {
2822 const VPRecipeBase *R = VPV->getDefiningRecipe();
2823 return R && classof(R);
2824 }
2825
2826 static inline bool classof(const VPSingleDefRecipe *R) {
2827 return classof(static_cast<const VPRecipeBase *>(R));
2828 }
2829
2830 /// Generate the reduction in the loop.
2831 void execute(VPTransformState &State) override;
2832
2833 /// Return the cost of VPReductionRecipe.
2834 InstructionCost computeCost(ElementCount VF,
2835 VPCostContext &Ctx) const override;
2836
2837 /// Return the recurrence kind for the in-loop reduction.
2838 RecurKind getRecurrenceKind() const { return RdxKind; }
2839 /// Return true if the in-loop reduction is ordered.
2840 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2841 /// Return true if the in-loop reduction is conditional.
2842 bool isConditional() const { return IsConditional; };
2843 /// Returns true if the reduction outputs a vector with a scaled down VF.
2844 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2845 /// Returns true if the reduction is in-loop.
2846 bool isInLoop() const {
2847 return std::holds_alternative<RdxInLoop>(Style) ||
2848 std::holds_alternative<RdxOrdered>(Style);
2849 }
2850 /// The VPValue of the scalar Chain being accumulated.
2851 VPValue *getChainOp() const { return getOperand(0); }
2852 /// The VPValue of the vector value to be reduced.
2853 VPValue *getVecOp() const { return getOperand(1); }
2854 /// The VPValue of the condition for the block.
2856 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2857 }
2858 /// Get the factor that the VF of this recipe's output should be scaled by, or
2859 /// 1 if it isn't scaled.
2860 unsigned getVFScaleFactor() const {
2861 auto *Partial = std::get_if<RdxUnordered>(&Style);
2862 return Partial ? Partial->VFScaleFactor : 1;
2863 }
2864
2865protected:
2866#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2867 /// Print the recipe.
2868 void printRecipe(raw_ostream &O, const Twine &Indent,
2869 VPSlotTracker &SlotTracker) const override;
2870#endif
2871};
2872
2873/// A recipe to represent inloop reduction operations with vector-predication
2874/// intrinsics, performing a reduction on a vector operand with the explicit
2875/// vector length (EVL) into a scalar value, and adding the result to a chain.
2876/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2878public:
2882 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2883 R.getFastMathFlags(),
2885 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2886 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2887
2888 ~VPReductionEVLRecipe() override = default;
2889
2891 llvm_unreachable("cloning not implemented yet");
2892 }
2893
2894 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2895
2896 /// Generate the reduction in the loop
2897 void execute(VPTransformState &State) override;
2898
2899 /// The VPValue of the explicit vector length.
2900 VPValue *getEVL() const { return getOperand(2); }
2901
2902 /// Returns true if the recipe only uses the first lane of operand \p Op.
2903 bool usesFirstLaneOnly(const VPValue *Op) const override {
2905 "Op must be an operand of the recipe");
2906 return Op == getEVL();
2907 }
2908
2909protected:
2910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2911 /// Print the recipe.
2912 void printRecipe(raw_ostream &O, const Twine &Indent,
2913 VPSlotTracker &SlotTracker) const override;
2914#endif
2915};
2916
2917/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2918/// copies of the original scalar type, one per lane, instead of producing a
2919/// single copy of widened type for all lanes. If the instruction is known to be
2920/// a single scalar, only one copy, per lane zero, will be generated.
2922 public VPIRMetadata {
2923 /// Indicator if only a single replica per lane is needed.
2924 bool IsSingleScalar;
2925
2926 /// Indicator if the replicas are also predicated.
2927 bool IsPredicated;
2928
2929public:
2931 bool IsSingleScalar, VPValue *Mask = nullptr,
2932 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2933 DebugLoc DL = DebugLoc::getUnknown())
2934 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2935 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2936 IsPredicated(Mask) {
2937 setUnderlyingValue(I);
2938 if (Mask)
2939 addOperand(Mask);
2940 }
2941
2942 ~VPReplicateRecipe() override = default;
2943
2945 auto *Copy = new VPReplicateRecipe(
2946 getUnderlyingInstr(), operands(), IsSingleScalar,
2947 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2948 Copy->transferFlags(*this);
2949 return Copy;
2950 }
2951
2952 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2953
2954 /// Generate replicas of the desired Ingredient. Replicas will be generated
2955 /// for all parts and lanes unless a specific part and lane are specified in
2956 /// the \p State.
2957 void execute(VPTransformState &State) override;
2958
2959 /// Return the cost of this VPReplicateRecipe.
2960 InstructionCost computeCost(ElementCount VF,
2961 VPCostContext &Ctx) const override;
2962
2963 bool isSingleScalar() const { return IsSingleScalar; }
2964
2965 bool isPredicated() const { return IsPredicated; }
2966
2967 /// Returns true if the recipe only uses the first lane of operand \p Op.
2968 bool usesFirstLaneOnly(const VPValue *Op) const override {
2970 "Op must be an operand of the recipe");
2971 return isSingleScalar();
2972 }
2973
2974 /// Returns true if the recipe uses scalars of operand \p Op.
2975 bool usesScalars(const VPValue *Op) const override {
2977 "Op must be an operand of the recipe");
2978 return true;
2979 }
2980
2981 /// Returns true if the recipe is used by a widened recipe via an intervening
2982 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2983 /// in a vector.
2984 bool shouldPack() const;
2985
2986 /// Return the mask of a predicated VPReplicateRecipe.
2988 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2989 return getOperand(getNumOperands() - 1);
2990 }
2991
2992 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2993
2994protected:
2995#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2996 /// Print the recipe.
2997 void printRecipe(raw_ostream &O, const Twine &Indent,
2998 VPSlotTracker &SlotTracker) const override;
2999#endif
3000};
3001
3002/// A recipe for generating conditional branches on the bits of a mask.
3004public:
3006 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3007
3010 }
3011
3012 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3013
3014 /// Generate the extraction of the appropriate bit from the block mask and the
3015 /// conditional branch.
3016 void execute(VPTransformState &State) override;
3017
3018 /// Return the cost of this VPBranchOnMaskRecipe.
3019 InstructionCost computeCost(ElementCount VF,
3020 VPCostContext &Ctx) const override;
3021
3022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3023 /// Print the recipe.
3024 void printRecipe(raw_ostream &O, const Twine &Indent,
3025 VPSlotTracker &SlotTracker) const override {
3026 O << Indent << "BRANCH-ON-MASK ";
3028 }
3029#endif
3030
3031 /// Returns true if the recipe uses scalars of operand \p Op.
3032 bool usesScalars(const VPValue *Op) const override {
3034 "Op must be an operand of the recipe");
3035 return true;
3036 }
3037};
3038
3039/// A recipe to combine multiple recipes into a single 'expression' recipe,
3040/// which should be considered a single entity for cost-modeling and transforms.
3041/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3042/// expression recipes, before execute. The individual expression recipes are
3043/// completely disconnected from the def-use graph of other recipes not part of
3044/// the expression. Def-use edges between pairs of expression recipes remain
3045/// intact, whereas every edge between an expression recipe and a recipe outside
3046/// the expression is elevated to connect the non-expression recipe with the
3047/// VPExpressionRecipe itself.
3048class VPExpressionRecipe : public VPSingleDefRecipe {
3049 /// Recipes included in this VPExpressionRecipe. This could contain
3050 /// duplicates.
3051 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3052
3053 /// Temporary VPValues used for external operands of the expression, i.e.
3054 /// operands not defined by recipes in the expression.
3055 SmallVector<VPValue *> LiveInPlaceholders;
3056
3057 enum class ExpressionTypes {
3058 /// Represents an inloop extended reduction operation, performing a
3059 /// reduction on an extended vector operand into a scalar value, and adding
3060 /// the result to a chain.
3061 ExtendedReduction,
3062 /// Represent an inloop multiply-accumulate reduction, multiplying the
3063 /// extended vector operands, performing a reduction.add on the result, and
3064 /// adding the scalar result to a chain.
3065 ExtMulAccReduction,
3066 /// Represent an inloop multiply-accumulate reduction, multiplying the
3067 /// vector operands, performing a reduction.add on the result, and adding
3068 /// the scalar result to a chain.
3069 MulAccReduction,
3070 /// Represent an inloop multiply-accumulate reduction, multiplying the
3071 /// extended vector operands, negating the multiplication, performing a
3072 /// reduction.add on the result, and adding the scalar result to a chain.
3073 ExtNegatedMulAccReduction,
3074 };
3075
3076 /// Type of the expression.
3077 ExpressionTypes ExpressionType;
3078
3079 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3080 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3081 /// in the expression) are replaced by temporary VPValues and the original
3082 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3083 /// as needed (excluding last) to ensure they are only used by other recipes
3084 /// in the expression.
3085 VPExpressionRecipe(ExpressionTypes ExpressionType,
3086 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3087
3088public:
3090 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3092 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3095 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3096 {Ext0, Ext1, Mul, Red}) {}
3099 VPReductionRecipe *Red)
3100 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3101 {Ext0, Ext1, Mul, Sub, Red}) {
3102 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3103 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3104 "Expected an add reduction");
3105 assert(getNumOperands() >= 3 && "Expected at least three operands");
3106 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3107 assert(SubConst && SubConst->getValue() == 0 &&
3108 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3109 }
3110
3112 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3113 for (auto *R : reverse(ExpressionRecipes)) {
3114 if (ExpressionRecipesSeen.insert(R).second)
3115 delete R;
3116 }
3117 for (VPValue *T : LiveInPlaceholders)
3118 delete T;
3119 }
3120
3121 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3122
3123 VPExpressionRecipe *clone() override {
3124 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3125 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3126 for (auto *R : ExpressionRecipes)
3127 NewExpressiondRecipes.push_back(R->clone());
3128 for (auto *New : NewExpressiondRecipes) {
3129 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3130 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3131 // Update placeholder operands in the cloned recipe to use the external
3132 // operands, to be internalized when the cloned expression is constructed.
3133 for (const auto &[Placeholder, OutsideOp] :
3134 zip(LiveInPlaceholders, operands()))
3135 New->replaceUsesOfWith(Placeholder, OutsideOp);
3136 }
3137 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3138 }
3139
3140 /// Return the VPValue to use to infer the result type of the recipe.
3142 unsigned OpIdx =
3143 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3144 : 1;
3145 return getOperand(getNumOperands() - OpIdx);
3146 }
3147
3148 /// Insert the recipes of the expression back into the VPlan, directly before
3149 /// the current recipe. Leaves the expression recipe empty, which must be
3150 /// removed before codegen.
3151 void decompose();
3152
3153 unsigned getVFScaleFactor() const {
3154 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3155 return PR ? PR->getVFScaleFactor() : 1;
3156 }
3157
3158 /// Method for generating code, must not be called as this recipe is abstract.
3159 void execute(VPTransformState &State) override {
3160 llvm_unreachable("recipe must be removed before execute");
3161 }
3162
3164 VPCostContext &Ctx) const override;
3165
3166 /// Returns true if this expression contains recipes that may read from or
3167 /// write to memory.
3168 bool mayReadOrWriteMemory() const;
3169
3170 /// Returns true if this expression contains recipes that may have side
3171 /// effects.
3172 bool mayHaveSideEffects() const;
3173
3174 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3175 bool isSingleScalar() const;
3176
3177protected:
3178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3179 /// Print the recipe.
3180 void printRecipe(raw_ostream &O, const Twine &Indent,
3181 VPSlotTracker &SlotTracker) const override;
3182#endif
3183};
3184
3185/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3186/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3187/// order to merge values that are set under such a branch and feed their uses.
3188/// The phi nodes can be scalar or vector depending on the users of the value.
3189/// This recipe works in concert with VPBranchOnMaskRecipe.
3191public:
3192 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3193 /// nodes after merging back from a Branch-on-Mask.
3195 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3196 ~VPPredInstPHIRecipe() override = default;
3197
3199 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3200 }
3201
3202 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3203
3204 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3205 /// retain SSA form.
3206 void execute(VPTransformState &State) override;
3207
3208 /// Return the cost of this VPPredInstPHIRecipe.
3210 VPCostContext &Ctx) const override {
3211 // TODO: Compute accurate cost after retiring the legacy cost model.
3212 return 0;
3213 }
3214
3215 /// Returns true if the recipe uses scalars of operand \p Op.
3216 bool usesScalars(const VPValue *Op) const override {
3218 "Op must be an operand of the recipe");
3219 return true;
3220 }
3221
3222protected:
3223#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3224 /// Print the recipe.
3225 void printRecipe(raw_ostream &O, const Twine &Indent,
3226 VPSlotTracker &SlotTracker) const override;
3227#endif
3228};
3229
3230/// A common base class for widening memory operations. An optional mask can be
3231/// provided as the last operand.
3233 public VPIRMetadata {
3234protected:
3236
3237 /// Alignment information for this memory access.
3239
3240 /// Whether the accessed addresses are consecutive.
3242
3243 /// Whether the consecutive accessed addresses are in reverse order.
3245
3246 /// Whether the memory access is masked.
3247 bool IsMasked = false;
3248
3249 void setMask(VPValue *Mask) {
3250 assert(!IsMasked && "cannot re-set mask");
3251 if (!Mask)
3252 return;
3253 addOperand(Mask);
3254 IsMasked = true;
3255 }
3256
3257 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3258 std::initializer_list<VPValue *> Operands,
3259 bool Consecutive, bool Reverse,
3260 const VPIRMetadata &Metadata, DebugLoc DL)
3261 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3263 Reverse(Reverse) {
3264 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3266 "Reversed acccess without VPVectorEndPointerRecipe address?");
3267 }
3268
3269public:
3271 llvm_unreachable("cloning not supported");
3272 }
3273
3274 static inline bool classof(const VPRecipeBase *R) {
3275 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3276 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3277 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3278 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3279 }
3280
3281 static inline bool classof(const VPUser *U) {
3282 auto *R = dyn_cast<VPRecipeBase>(U);
3283 return R && classof(R);
3284 }
3285
3286 /// Return whether the loaded-from / stored-to addresses are consecutive.
3287 bool isConsecutive() const { return Consecutive; }
3288
3289 /// Return whether the consecutive loaded/stored addresses are in reverse
3290 /// order.
3291 bool isReverse() const { return Reverse; }
3292
3293 /// Return the address accessed by this recipe.
3294 VPValue *getAddr() const { return getOperand(0); }
3295
3296 /// Returns true if the recipe is masked.
3297 bool isMasked() const { return IsMasked; }
3298
3299 /// Return the mask used by this recipe. Note that a full mask is represented
3300 /// by a nullptr.
3301 VPValue *getMask() const {
3302 // Mask is optional and therefore the last operand.
3303 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3304 }
3305
3306 /// Returns the alignment of the memory access.
3307 Align getAlign() const { return Alignment; }
3308
3309 /// Generate the wide load/store.
3310 void execute(VPTransformState &State) override {
3311 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3312 }
3313
3314 /// Return the cost of this VPWidenMemoryRecipe.
3315 InstructionCost computeCost(ElementCount VF,
3316 VPCostContext &Ctx) const override;
3317
3319};
3320
3321/// A recipe for widening load operations, using the address to load from and an
3322/// optional mask.
3324 public VPRecipeValue {
3326 bool Consecutive, bool Reverse,
3327 const VPIRMetadata &Metadata, DebugLoc DL)
3328 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3329 Reverse, Metadata, DL),
3330 VPRecipeValue(this, &Load) {
3331 setMask(Mask);
3332 }
3333
3336 getMask(), Consecutive, Reverse, *this,
3337 getDebugLoc());
3338 }
3339
3340 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3341
3342 /// Generate a wide load or gather.
3343 void execute(VPTransformState &State) override;
3344
3345 /// Returns true if the recipe only uses the first lane of operand \p Op.
3346 bool usesFirstLaneOnly(const VPValue *Op) const override {
3348 "Op must be an operand of the recipe");
3349 // Widened, consecutive loads operations only demand the first lane of
3350 // their address.
3351 return Op == getAddr() && isConsecutive();
3352 }
3353
3354protected:
3355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3356 /// Print the recipe.
3357 void printRecipe(raw_ostream &O, const Twine &Indent,
3358 VPSlotTracker &SlotTracker) const override;
3359#endif
3360};
3361
3362/// A recipe for widening load operations with vector-predication intrinsics,
3363/// using the address to load from, the explicit vector length and an optional
3364/// mask.
3366 public VPRecipeValue {
3368 VPValue *Mask)
3369 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3370 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3371 L.getDebugLoc()),
3372 VPRecipeValue(this, &getIngredient()) {
3373 setMask(Mask);
3374 }
3375
3376 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3377
3378 /// Return the EVL operand.
3379 VPValue *getEVL() const { return getOperand(1); }
3380
3381 /// Generate the wide load or gather.
3382 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3383
3384 /// Return the cost of this VPWidenLoadEVLRecipe.
3386 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3387
3388 /// Returns true if the recipe only uses the first lane of operand \p Op.
3389 bool usesFirstLaneOnly(const VPValue *Op) const override {
3391 "Op must be an operand of the recipe");
3392 // Widened loads only demand the first lane of EVL and consecutive loads
3393 // only demand the first lane of their address.
3394 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3395 }
3396
3397protected:
3398#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3399 /// Print the recipe.
3400 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3401 VPSlotTracker &SlotTracker) const override;
3402#endif
3403};
3404
3405/// A recipe for widening store operations, using the stored value, the address
3406/// to store to and an optional mask.
3408 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3409 VPValue *Mask, bool Consecutive, bool Reverse,
3410 const VPIRMetadata &Metadata, DebugLoc DL)
3411 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3412 Consecutive, Reverse, Metadata, DL) {
3413 setMask(Mask);
3414 }
3415
3421
3422 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3423
3424 /// Return the value stored by this recipe.
3425 VPValue *getStoredValue() const { return getOperand(1); }
3426
3427 /// Generate a wide store or scatter.
3428 void execute(VPTransformState &State) override;
3429
3430 /// Returns true if the recipe only uses the first lane of operand \p Op.
3431 bool usesFirstLaneOnly(const VPValue *Op) const override {
3433 "Op must be an operand of the recipe");
3434 // Widened, consecutive stores only demand the first lane of their address,
3435 // unless the same operand is also stored.
3436 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3437 }
3438
3439protected:
3440#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3441 /// Print the recipe.
3442 void printRecipe(raw_ostream &O, const Twine &Indent,
3443 VPSlotTracker &SlotTracker) const override;
3444#endif
3445};
3446
3447/// A recipe for widening store operations with vector-predication intrinsics,
3448/// using the value to store, the address to store to, the explicit vector
3449/// length and an optional mask.
3452 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3453 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3454 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3455 S.isReverse(), S, S.getDebugLoc()) {
3456 setMask(Mask);
3457 }
3458
3459 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3460
3461 /// Return the address accessed by this recipe.
3462 VPValue *getStoredValue() const { return getOperand(1); }
3463
3464 /// Return the EVL operand.
3465 VPValue *getEVL() const { return getOperand(2); }
3466
3467 /// Generate the wide store or scatter.
3468 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3469
3470 /// Return the cost of this VPWidenStoreEVLRecipe.
3472 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3473
3474 /// Returns true if the recipe only uses the first lane of operand \p Op.
3475 bool usesFirstLaneOnly(const VPValue *Op) const override {
3477 "Op must be an operand of the recipe");
3478 if (Op == getEVL()) {
3479 assert(getStoredValue() != Op && "unexpected store of EVL");
3480 return true;
3481 }
3482 // Widened, consecutive memory operations only demand the first lane of
3483 // their address, unless the same operand is also stored. That latter can
3484 // happen with opaque pointers.
3485 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3486 }
3487
3488protected:
3489#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3490 /// Print the recipe.
3491 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3492 VPSlotTracker &SlotTracker) const override;
3493#endif
3494};
3495
3496/// Recipe to expand a SCEV expression.
3498 const SCEV *Expr;
3499
3500public:
3502 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3503
3504 ~VPExpandSCEVRecipe() override = default;
3505
3506 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3507
3508 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3509
3510 void execute(VPTransformState &State) override {
3511 llvm_unreachable("SCEV expressions must be expanded before final execute");
3512 }
3513
3514 /// Return the cost of this VPExpandSCEVRecipe.
3516 VPCostContext &Ctx) const override {
3517 // TODO: Compute accurate cost after retiring the legacy cost model.
3518 return 0;
3519 }
3520
3521 const SCEV *getSCEV() const { return Expr; }
3522
3523protected:
3524#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3525 /// Print the recipe.
3526 void printRecipe(raw_ostream &O, const Twine &Indent,
3527 VPSlotTracker &SlotTracker) const override;
3528#endif
3529};
3530
3531/// Canonical scalar induction phi of the vector loop. Starting at the specified
3532/// start value (either 0 or the resume value when vectorizing the epilogue
3533/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3534/// canonical induction variable.
3536public:
3538 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3539
3540 ~VPCanonicalIVPHIRecipe() override = default;
3541
3544 R->addOperand(getBackedgeValue());
3545 return R;
3546 }
3547
3548 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3549
3550 void execute(VPTransformState &State) override {
3551 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3552 "scalar phi recipe");
3553 }
3554
3555 /// Returns the start value of the canonical induction.
3557
3558 /// Returns the scalar type of the induction.
3559 Type *getScalarType() const { return getStartValue()->getType(); }
3560
3561 /// Returns true if the recipe only uses the first lane of operand \p Op.
3562 bool usesFirstLaneOnly(const VPValue *Op) const override {
3564 "Op must be an operand of the recipe");
3565 return true;
3566 }
3567
3568 /// Returns true if the recipe only uses the first part of operand \p Op.
3569 bool usesFirstPartOnly(const VPValue *Op) const override {
3571 "Op must be an operand of the recipe");
3572 return true;
3573 }
3574
3575 /// Return the cost of this VPCanonicalIVPHIRecipe.
3577 VPCostContext &Ctx) const override {
3578 // For now, match the behavior of the legacy cost model.
3579 return 0;
3580 }
3581
3582protected:
3583#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3584 /// Print the recipe.
3585 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3586 VPSlotTracker &SlotTracker) const override;
3587#endif
3588};
3589
3590/// A recipe for generating the active lane mask for the vector loop that is
3591/// used to predicate the vector operations.
3593public:
3595 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3596 DL) {}
3597
3598 ~VPActiveLaneMaskPHIRecipe() override = default;
3599
3602 if (getNumOperands() == 2)
3603 R->addOperand(getOperand(1));
3604 return R;
3605 }
3606
3607 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3608
3609 /// Generate the active lane mask phi of the vector loop.
3610 void execute(VPTransformState &State) override;
3611
3612protected:
3613#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3614 /// Print the recipe.
3615 void printRecipe(raw_ostream &O, const Twine &Indent,
3616 VPSlotTracker &SlotTracker) const override;
3617#endif
3618};
3619
3620/// A recipe for generating the phi node for the current index of elements,
3621/// adjusted in accordance with EVL value. It starts at the start value of the
3622/// canonical induction and gets incremented by EVL in each iteration of the
3623/// vector loop.
3625public:
3627 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3628
3629 ~VPEVLBasedIVPHIRecipe() override = default;
3630
3632 llvm_unreachable("cloning not implemented yet");
3633 }
3634
3635 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3636
3637 void execute(VPTransformState &State) override {
3638 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3639 "scalar phi recipe");
3640 }
3641
3642 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3644 VPCostContext &Ctx) const override {
3645 // For now, match the behavior of the legacy cost model.
3646 return 0;
3647 }
3648
3649 /// Returns true if the recipe only uses the first lane of operand \p Op.
3650 bool usesFirstLaneOnly(const VPValue *Op) const override {
3652 "Op must be an operand of the recipe");
3653 return true;
3654 }
3655
3656protected:
3657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3658 /// Print the recipe.
3659 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3660 VPSlotTracker &SlotTracker) const override;
3661#endif
3662};
3663
3664/// A Recipe for widening the canonical induction variable of the vector loop.
3666 public VPUnrollPartAccessor<1> {
3667public:
3669 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3670
3671 ~VPWidenCanonicalIVRecipe() override = default;
3672
3677
3678 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3679
3680 /// Generate a canonical vector induction variable of the vector loop, with
3681 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3682 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3683 void execute(VPTransformState &State) override;
3684
3685 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3687 VPCostContext &Ctx) const override {
3688 // TODO: Compute accurate cost after retiring the legacy cost model.
3689 return 0;
3690 }
3691
3692protected:
3693#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3694 /// Print the recipe.
3695 void printRecipe(raw_ostream &O, const Twine &Indent,
3696 VPSlotTracker &SlotTracker) const override;
3697#endif
3698};
3699
3700/// A recipe for converting the input value \p IV value to the corresponding
3701/// value of an IV with different start and step values, using Start + IV *
3702/// Step.
3704 /// Kind of the induction.
3706 /// If not nullptr, the floating point induction binary operator. Must be set
3707 /// for floating point inductions.
3708 const FPMathOperator *FPBinOp;
3709
3710 /// Name to use for the generated IR instruction for the derived IV.
3711 std::string Name;
3712
3713public:
3715 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3716 const Twine &Name = "")
3718 IndDesc.getKind(),
3719 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3720 Start, CanonicalIV, Step, Name) {}
3721
3723 const FPMathOperator *FPBinOp, VPIRValue *Start,
3724 VPValue *IV, VPValue *Step, const Twine &Name = "")
3725 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3726 FPBinOp(FPBinOp), Name(Name.str()) {}
3727
3728 ~VPDerivedIVRecipe() override = default;
3729
3731 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3732 getStepValue());
3733 }
3734
3735 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3736
3737 /// Generate the transformed value of the induction at offset StartValue (1.
3738 /// operand) + IV (2. operand) * StepValue (3, operand).
3739 void execute(VPTransformState &State) override;
3740
3741 /// Return the cost of this VPDerivedIVRecipe.
3743 VPCostContext &Ctx) const override {
3744 // TODO: Compute accurate cost after retiring the legacy cost model.
3745 return 0;
3746 }
3747
3748 Type *getScalarType() const { return getStartValue()->getType(); }
3749
3751 VPValue *getStepValue() const { return getOperand(2); }
3752
3753 /// Returns true if the recipe only uses the first lane of operand \p Op.
3754 bool usesFirstLaneOnly(const VPValue *Op) const override {
3756 "Op must be an operand of the recipe");
3757 return true;
3758 }
3759
3760protected:
3761#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3762 /// Print the recipe.
3763 void printRecipe(raw_ostream &O, const Twine &Indent,
3764 VPSlotTracker &SlotTracker) const override;
3765#endif
3766};
3767
3768/// A recipe for handling phi nodes of integer and floating-point inductions,
3769/// producing their scalar values.
3771 public VPUnrollPartAccessor<3> {
3772 Instruction::BinaryOps InductionOpcode;
3773
3774public:
3777 DebugLoc DL)
3778 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3779 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3780 InductionOpcode(Opcode) {}
3781
3783 VPValue *Step, VPValue *VF,
3786 IV, Step, VF, IndDesc.getInductionOpcode(),
3787 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3788 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3789 : FastMathFlags(),
3790 DL) {}
3791
3792 ~VPScalarIVStepsRecipe() override = default;
3793
3795 return new VPScalarIVStepsRecipe(
3796 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3798 getDebugLoc());
3799 }
3800
3801 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3802 /// this is only accurate after the VPlan has been unrolled.
3803 bool isPart0() const { return getUnrollPart(*this) == 0; }
3804
3805 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3806
3807 /// Generate the scalarized versions of the phi node as needed by their users.
3808 void execute(VPTransformState &State) override;
3809
3810 /// Return the cost of this VPScalarIVStepsRecipe.
3812 VPCostContext &Ctx) const override {
3813 // TODO: Compute accurate cost after retiring the legacy cost model.
3814 return 0;
3815 }
3816
3817 VPValue *getStepValue() const { return getOperand(1); }
3818
3819 /// Returns true if the recipe only uses the first lane of operand \p Op.
3820 bool usesFirstLaneOnly(const VPValue *Op) const override {
3822 "Op must be an operand of the recipe");
3823 return true;
3824 }
3825
3826protected:
3827#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3828 /// Print the recipe.
3829 void printRecipe(raw_ostream &O, const Twine &Indent,
3830 VPSlotTracker &SlotTracker) const override;
3831#endif
3832};
3833
3834/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3835/// types implementing VPPhiAccessors. Used by isa<> & co.
3837 static inline bool isPossible(const VPRecipeBase *f) {
3838 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3840 }
3841};
3842/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3843/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3844template <typename SrcTy>
3845struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3846
3848
3849 /// doCast is used by cast<>.
3850 static inline VPPhiAccessors *doCast(SrcTy R) {
3851 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3852 switch (R->getVPDefID()) {
3853 case VPDef::VPInstructionSC:
3854 return cast<VPPhi>(R);
3855 case VPDef::VPIRInstructionSC:
3856 return cast<VPIRPhi>(R);
3857 case VPDef::VPWidenPHISC:
3858 return cast<VPWidenPHIRecipe>(R);
3859 default:
3860 return cast<VPHeaderPHIRecipe>(R);
3861 }
3862 }());
3863 }
3864
3865 /// doCastIfPossible is used by dyn_cast<>.
3866 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3867 if (!Self::isPossible(f))
3868 return nullptr;
3869 return doCast(f);
3870 }
3871};
3872template <>
3875template <>
3878
3879/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3880/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3881namespace detail {
3882template <typename DstTy, typename RecipeBasePtrTy>
3883static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3884 switch (R->getVPDefID()) {
3885 case VPDef::VPInstructionSC:
3886 return cast<VPInstruction>(R);
3887 case VPDef::VPWidenSC:
3888 return cast<VPWidenRecipe>(R);
3889 case VPDef::VPWidenCastSC:
3890 return cast<VPWidenCastRecipe>(R);
3891 case VPDef::VPWidenIntrinsicSC:
3893 case VPDef::VPWidenCallSC:
3894 return cast<VPWidenCallRecipe>(R);
3895 case VPDef::VPReplicateSC:
3896 return cast<VPReplicateRecipe>(R);
3897 case VPDef::VPInterleaveSC:
3898 case VPDef::VPInterleaveEVLSC:
3899 return cast<VPInterleaveBase>(R);
3900 case VPDef::VPWidenLoadSC:
3901 case VPDef::VPWidenLoadEVLSC:
3902 case VPDef::VPWidenStoreSC:
3903 case VPDef::VPWidenStoreEVLSC:
3904 return cast<VPWidenMemoryRecipe>(R);
3905 default:
3906 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3907 }
3908}
3909} // namespace detail
3910
3911/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3912/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3913template <typename DstTy, typename SrcTy>
3914struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3915 static inline bool isPossible(SrcTy R) {
3916 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3917 // also handled in castToVPIRMetadata.
3922 R);
3923 }
3924
3925 using RetTy = DstTy *;
3926
3927 /// doCast is used by cast<>.
3928 static inline RetTy doCast(SrcTy R) {
3930 }
3931
3932 /// doCastIfPossible is used by dyn_cast<>.
3933 static inline RetTy doCastIfPossible(SrcTy R) {
3934 if (!isPossible(R))
3935 return nullptr;
3936 return doCast(R);
3937 }
3938};
3939template <>
3942template <>
3945
3946/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3947/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3948/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3949class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3950 friend class VPlan;
3951
3952 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3953 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3954 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3955 if (Recipe)
3956 appendRecipe(Recipe);
3957 }
3958
3959public:
3961
3962protected:
3963 /// The VPRecipes held in the order of output instructions to generate.
3965
3966 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3967 : VPBlockBase(BlockSC, Name.str()) {}
3968
3969public:
3970 ~VPBasicBlock() override {
3971 while (!Recipes.empty())
3972 Recipes.pop_back();
3973 }
3974
3975 /// Instruction iterators...
3980
3981 //===--------------------------------------------------------------------===//
3982 /// Recipe iterator methods
3983 ///
3984 inline iterator begin() { return Recipes.begin(); }
3985 inline const_iterator begin() const { return Recipes.begin(); }
3986 inline iterator end() { return Recipes.end(); }
3987 inline const_iterator end() const { return Recipes.end(); }
3988
3989 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3990 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3991 inline reverse_iterator rend() { return Recipes.rend(); }
3992 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3993
3994 inline size_t size() const { return Recipes.size(); }
3995 inline bool empty() const { return Recipes.empty(); }
3996 inline const VPRecipeBase &front() const { return Recipes.front(); }
3997 inline VPRecipeBase &front() { return Recipes.front(); }
3998 inline const VPRecipeBase &back() const { return Recipes.back(); }
3999 inline VPRecipeBase &back() { return Recipes.back(); }
4000
4001 /// Returns a reference to the list of recipes.
4003
4004 /// Returns a pointer to a member of the recipe list.
4005 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4006 return &VPBasicBlock::Recipes;
4007 }
4008
4009 /// Method to support type inquiry through isa, cast, and dyn_cast.
4010 static inline bool classof(const VPBlockBase *V) {
4011 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4012 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4013 }
4014
4015 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4016 assert(Recipe && "No recipe to append.");
4017 assert(!Recipe->Parent && "Recipe already in VPlan");
4018 Recipe->Parent = this;
4019 Recipes.insert(InsertPt, Recipe);
4020 }
4021
4022 /// Augment the existing recipes of a VPBasicBlock with an additional
4023 /// \p Recipe as the last recipe.
4024 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4025
4026 /// The method which generates the output IR instructions that correspond to
4027 /// this VPBasicBlock, thereby "executing" the VPlan.
4028 void execute(VPTransformState *State) override;
4029
4030 /// Return the cost of this VPBasicBlock.
4031 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4032
4033 /// Return the position of the first non-phi node recipe in the block.
4034 iterator getFirstNonPhi();
4035
4036 /// Returns an iterator range over the PHI-like recipes in the block.
4040
4041 /// Split current block at \p SplitAt by inserting a new block between the
4042 /// current block and its successors and moving all recipes starting at
4043 /// SplitAt to the new block. Returns the new block.
4044 VPBasicBlock *splitAt(iterator SplitAt);
4045
4046 VPRegionBlock *getEnclosingLoopRegion();
4047 const VPRegionBlock *getEnclosingLoopRegion() const;
4048
4049#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4050 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4051 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4052 ///
4053 /// Note that the numbering is applied to the whole VPlan, so printing
4054 /// individual blocks is consistent with the whole VPlan printing.
4055 void print(raw_ostream &O, const Twine &Indent,
4056 VPSlotTracker &SlotTracker) const override;
4057 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4058#endif
4059
4060 /// If the block has multiple successors, return the branch recipe terminating
4061 /// the block. If there are no or only a single successor, return nullptr;
4062 VPRecipeBase *getTerminator();
4063 const VPRecipeBase *getTerminator() const;
4064
4065 /// Returns true if the block is exiting it's parent region.
4066 bool isExiting() const;
4067
4068 /// Clone the current block and it's recipes, without updating the operands of
4069 /// the cloned recipes.
4070 VPBasicBlock *clone() override;
4071
4072 /// Returns the predecessor block at index \p Idx with the predecessors as per
4073 /// the corresponding plain CFG. If the block is an entry block to a region,
4074 /// the first predecessor is the single predecessor of a region, and the
4075 /// second predecessor is the exiting block of the region.
4076 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4077
4078protected:
4079 /// Execute the recipes in the IR basic block \p BB.
4080 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4081
4082 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4083 /// generated for this VPBB.
4084 void connectToPredecessors(VPTransformState &State);
4085
4086private:
4087 /// Create an IR BasicBlock to hold the output instructions generated by this
4088 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4089 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4090};
4091
4092inline const VPBasicBlock *
4094 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4095}
4096
4097/// A special type of VPBasicBlock that wraps an existing IR basic block.
4098/// Recipes of the block get added before the first non-phi instruction in the
4099/// wrapped block.
4100/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4101/// preheader block.
4102class VPIRBasicBlock : public VPBasicBlock {
4103 friend class VPlan;
4104
4105 BasicBlock *IRBB;
4106
4107 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4108 VPIRBasicBlock(BasicBlock *IRBB)
4109 : VPBasicBlock(VPIRBasicBlockSC,
4110 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4111 IRBB(IRBB) {}
4112
4113public:
4114 ~VPIRBasicBlock() override = default;
4115
4116 static inline bool classof(const VPBlockBase *V) {
4117 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4118 }
4119
4120 /// The method which generates the output IR instructions that correspond to
4121 /// this VPBasicBlock, thereby "executing" the VPlan.
4122 void execute(VPTransformState *State) override;
4123
4124 VPIRBasicBlock *clone() override;
4125
4126 BasicBlock *getIRBasicBlock() const { return IRBB; }
4127};
4128
4129/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4130/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4131/// A VPRegionBlock may indicate that its contents are to be replicated several
4132/// times. This is designed to support predicated scalarization, in which a
4133/// scalar if-then code structure needs to be generated VF * UF times. Having
4134/// this replication indicator helps to keep a single model for multiple
4135/// candidate VF's. The actual replication takes place only once the desired VF
4136/// and UF have been determined.
4137class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4138 friend class VPlan;
4139
4140 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4141 VPBlockBase *Entry;
4142
4143 /// Hold the Single Exiting block of the SESE region modelled by the
4144 /// VPRegionBlock.
4145 VPBlockBase *Exiting;
4146
4147 /// An indicator whether this region is to generate multiple replicated
4148 /// instances of output IR corresponding to its VPBlockBases.
4149 bool IsReplicator;
4150
4151 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4152 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4153 const std::string &Name = "", bool IsReplicator = false)
4154 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4155 IsReplicator(IsReplicator) {
4156 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4157 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4158 Entry->setParent(this);
4159 Exiting->setParent(this);
4160 }
4161 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4162 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4163 IsReplicator(IsReplicator) {}
4164
4165public:
4166 ~VPRegionBlock() override = default;
4167
4168 /// Method to support type inquiry through isa, cast, and dyn_cast.
4169 static inline bool classof(const VPBlockBase *V) {
4170 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4171 }
4172
4173 const VPBlockBase *getEntry() const { return Entry; }
4174 VPBlockBase *getEntry() { return Entry; }
4175
4176 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4177 /// EntryBlock must have no predecessors.
4178 void setEntry(VPBlockBase *EntryBlock) {
4179 assert(EntryBlock->getPredecessors().empty() &&
4180 "Entry block cannot have predecessors.");
4181 Entry = EntryBlock;
4182 EntryBlock->setParent(this);
4183 }
4184
4185 const VPBlockBase *getExiting() const { return Exiting; }
4186 VPBlockBase *getExiting() { return Exiting; }
4187
4188 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4189 /// ExitingBlock must have no successors.
4190 void setExiting(VPBlockBase *ExitingBlock) {
4191 assert(ExitingBlock->getSuccessors().empty() &&
4192 "Exit block cannot have successors.");
4193 Exiting = ExitingBlock;
4194 ExitingBlock->setParent(this);
4195 }
4196
4197 /// Returns the pre-header VPBasicBlock of the loop region.
4199 assert(!isReplicator() && "should only get pre-header of loop regions");
4200 return getSinglePredecessor()->getExitingBasicBlock();
4201 }
4202
4203 /// An indicator whether this region is to generate multiple replicated
4204 /// instances of output IR corresponding to its VPBlockBases.
4205 bool isReplicator() const { return IsReplicator; }
4206
4207 /// The method which generates the output IR instructions that correspond to
4208 /// this VPRegionBlock, thereby "executing" the VPlan.
4209 void execute(VPTransformState *State) override;
4210
4211 // Return the cost of this region.
4212 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4213
4214#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4215 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4216 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4217 /// consequtive numbers.
4218 ///
4219 /// Note that the numbering is applied to the whole VPlan, so printing
4220 /// individual regions is consistent with the whole VPlan printing.
4221 void print(raw_ostream &O, const Twine &Indent,
4222 VPSlotTracker &SlotTracker) const override;
4223 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4224#endif
4225
4226 /// Clone all blocks in the single-entry single-exit region of the block and
4227 /// their recipes without updating the operands of the cloned recipes.
4228 VPRegionBlock *clone() override;
4229
4230 /// Remove the current region from its VPlan, connecting its predecessor to
4231 /// its entry, and its exiting block to its successor.
4232 void dissolveToCFGLoop();
4233
4234 /// Returns the canonical induction recipe of the region.
4236 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4237 if (EntryVPBB->empty()) {
4238 // VPlan native path. TODO: Unify both code paths.
4239 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4240 }
4241 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4242 }
4244 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4245 }
4246
4247 /// Return the type of the canonical IV for loop regions.
4248 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4249 const Type *getCanonicalIVType() const {
4250 return getCanonicalIV()->getScalarType();
4251 }
4252};
4253
4255 return getParent()->getParent();
4256}
4257
4259 return getParent()->getParent();
4260}
4261
4262/// VPlan models a candidate for vectorization, encoding various decisions take
4263/// to produce efficient output IR, including which branches, basic-blocks and
4264/// output IR instructions to generate, and their cost. VPlan holds a
4265/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4266/// VPBasicBlock.
4267class VPlan {
4268 friend class VPlanPrinter;
4269 friend class VPSlotTracker;
4270
4271 /// VPBasicBlock corresponding to the original preheader. Used to place
4272 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4273 /// rest of VPlan execution.
4274 /// When this VPlan is used for the epilogue vector loop, the entry will be
4275 /// replaced by a new entry block created during skeleton creation.
4276 VPBasicBlock *Entry;
4277
4278 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4279 VPIRBasicBlock *ScalarHeader;
4280
4281 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4282 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4283 /// e.g. if the scalar epilogue always executes.
4285
4286 /// Holds the VFs applicable to this VPlan.
4288
4289 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4290 /// any UF.
4292
4293 /// Holds the name of the VPlan, for printing.
4294 std::string Name;
4295
4296 /// Represents the trip count of the original loop, for folding
4297 /// the tail.
4298 VPValue *TripCount = nullptr;
4299
4300 /// Represents the backedge taken count of the original loop, for folding
4301 /// the tail. It equals TripCount - 1.
4302 VPSymbolicValue *BackedgeTakenCount = nullptr;
4303
4304 /// Represents the vector trip count.
4305 VPSymbolicValue VectorTripCount;
4306
4307 /// Represents the vectorization factor of the loop.
4308 VPSymbolicValue VF;
4309
4310 /// Represents the loop-invariant VF * UF of the vector loop region.
4311 VPSymbolicValue VFxUF;
4312
4313 /// Contains all the external definitions created for this VPlan, as a mapping
4314 /// from IR Values to VPIRValues.
4316
4317 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4318 /// VPlan is destroyed.
4319 SmallVector<VPBlockBase *> CreatedBlocks;
4320
4321 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4322 /// wrapping the original header of the scalar loop.
4323 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4324 : Entry(Entry), ScalarHeader(ScalarHeader) {
4325 Entry->setPlan(this);
4326 assert(ScalarHeader->getNumSuccessors() == 0 &&
4327 "scalar header must be a leaf node");
4328 }
4329
4330public:
4331 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4332 /// original preheader and scalar header of \p L, to be used as entry and
4333 /// scalar header blocks of the new VPlan.
4334 VPlan(Loop *L);
4335
4336 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4337 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4338 VPlan(BasicBlock *ScalarHeaderBB) {
4339 setEntry(createVPBasicBlock("preheader"));
4340 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4341 }
4342
4344
4346 Entry = VPBB;
4347 VPBB->setPlan(this);
4348 }
4349
4350 /// Generate the IR code for this VPlan.
4351 void execute(VPTransformState *State);
4352
4353 /// Return the cost of this plan.
4355
4356 VPBasicBlock *getEntry() { return Entry; }
4357 const VPBasicBlock *getEntry() const { return Entry; }
4358
4359 /// Returns the preheader of the vector loop region, if one exists, or null
4360 /// otherwise.
4362 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4363 return VectorRegion
4364 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4365 : nullptr;
4366 }
4367
4368 /// Returns the VPRegionBlock of the vector loop.
4371
4372 /// Returns the 'middle' block of the plan, that is the block that selects
4373 /// whether to execute the scalar tail loop or the exit block from the loop
4374 /// latch. If there is an early exit from the vector loop, the middle block
4375 /// conceptully has the early exit block as third successor, split accross 2
4376 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4377 /// tail loop or the exit block. If the scalar tail loop or exit block are
4378 /// known to always execute, the middle block may branch directly to that
4379 /// block. This function cannot be called once the vector loop region has been
4380 /// removed.
4382 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4383 assert(
4384 LoopRegion &&
4385 "cannot call the function after vector loop region has been removed");
4386 // The middle block is always the last successor of the region.
4387 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4388 }
4389
4391 return const_cast<VPlan *>(this)->getMiddleBlock();
4392 }
4393
4394 /// Return the VPBasicBlock for the preheader of the scalar loop.
4396 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4397 }
4398
4399 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4400 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4401
4402 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4403 /// the original scalar loop.
4404 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4405
4406 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4407 /// exit block.
4409
4410 /// Returns true if \p VPBB is an exit block.
4411 bool isExitBlock(VPBlockBase *VPBB);
4412
4413 /// The trip count of the original loop.
4415 assert(TripCount && "trip count needs to be set before accessing it");
4416 return TripCount;
4417 }
4418
4419 /// Set the trip count assuming it is currently null; if it is not - use
4420 /// resetTripCount().
4421 void setTripCount(VPValue *NewTripCount) {
4422 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4423 TripCount = NewTripCount;
4424 }
4425
4426 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4427 /// the original trip count have been replaced.
4428 void resetTripCount(VPValue *NewTripCount) {
4429 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4430 "TripCount must be set when resetting");
4431 TripCount = NewTripCount;
4432 }
4433
4434 /// The backedge taken count of the original loop.
4436 if (!BackedgeTakenCount)
4437 BackedgeTakenCount = new VPSymbolicValue();
4438 return BackedgeTakenCount;
4439 }
4440 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4441
4442 /// The vector trip count.
4443 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4444
4445 /// Returns the VF of the vector loop region.
4446 VPValue &getVF() { return VF; };
4447 const VPValue &getVF() const { return VF; };
4448
4449 /// Returns VF * UF of the vector loop region.
4450 VPValue &getVFxUF() { return VFxUF; }
4451
4454 }
4455
4456 void addVF(ElementCount VF) { VFs.insert(VF); }
4457
4459 assert(hasVF(VF) && "Cannot set VF not already in plan");
4460 VFs.clear();
4461 VFs.insert(VF);
4462 }
4463
4464 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4465 bool hasScalableVF() const {
4466 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4467 }
4468
4469 /// Returns an iterator range over all VFs of the plan.
4472 return VFs;
4473 }
4474
4475 bool hasScalarVFOnly() const {
4476 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4477 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4478 "Plan with scalar VF should only have a single VF");
4479 return HasScalarVFOnly;
4480 }
4481
4482 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4483
4484 unsigned getUF() const {
4485 assert(UFs.size() == 1 && "Expected a single UF");
4486 return UFs[0];
4487 }
4488
4489 void setUF(unsigned UF) {
4490 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4491 UFs.clear();
4492 UFs.insert(UF);
4493 }
4494
4495 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4496 /// concrete UF.
4497 bool isUnrolled() const { return UFs.size() == 1; }
4498
4499 /// Return a string with the name of the plan and the applicable VFs and UFs.
4500 std::string getName() const;
4501
4502 void setName(const Twine &newName) { Name = newName.str(); }
4503
4504 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4505 /// yet) for \p V.
4507 assert(V && "Trying to get or add the VPIRValue of a null Value");
4508 auto [It, Inserted] = LiveIns.try_emplace(V);
4509 if (Inserted)
4510 It->second = new VPIRValue(V);
4511
4512 assert(isa<VPIRValue>(It->second) &&
4513 "Only VPIRValues should be in mapping");
4514 return It->second;
4515 }
4517 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4518 return getOrAddLiveIn(V->getValue());
4519 }
4520
4521 /// Return a VPIRValue wrapping i1 true.
4522 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4523
4524 /// Return a VPIRValue wrapping i1 false.
4525 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4526
4527 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4528 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4529 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4530 }
4531
4532 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4533 /// value.
4535 bool IsSigned = false) {
4536 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4537 }
4538
4539 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4541 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4542 }
4543
4544 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4545 /// otherwise.
4546 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4547
4548 /// Return the list of live-in VPValues available in the VPlan.
4549 auto getLiveIns() const { return LiveIns.values(); }
4550
4551#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4552 /// Print the live-ins of this VPlan to \p O.
4553 void printLiveIns(raw_ostream &O) const;
4554
4555 /// Print this VPlan to \p O.
4556 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4557
4558 /// Print this VPlan in DOT format to \p O.
4559 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4560
4561 /// Dump the plan to stderr (for debugging).
4562 LLVM_DUMP_METHOD void dump() const;
4563#endif
4564
4565 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4566 /// recipes to refer to the clones, and return it.
4568
4569 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4570 /// present. The returned block is owned by the VPlan and deleted once the
4571 /// VPlan is destroyed.
4573 VPRecipeBase *Recipe = nullptr) {
4574 auto *VPB = new VPBasicBlock(Name, Recipe);
4575 CreatedBlocks.push_back(VPB);
4576 return VPB;
4577 }
4578
4579 /// Create a new loop region with \p Name and entry and exiting blocks set
4580 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4581 /// owned by the VPlan and deleted once the VPlan is destroyed.
4582 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4583 VPBlockBase *Entry = nullptr,
4584 VPBlockBase *Exiting = nullptr) {
4585 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4586 : new VPRegionBlock(Name);
4587 CreatedBlocks.push_back(VPB);
4588 return VPB;
4589 }
4590
4591 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4592 /// returned block is owned by the VPlan and deleted once the VPlan is
4593 /// destroyed.
4595 const std::string &Name = "") {
4596 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4597 CreatedBlocks.push_back(VPB);
4598 return VPB;
4599 }
4600
4601 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4602 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4603 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4605
4606 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4607 /// instructions in \p IRBB, except its terminator which is managed by the
4608 /// successors of the block in VPlan. The returned block is owned by the VPlan
4609 /// and deleted once the VPlan is destroyed.
4611
4612 /// Returns true if the VPlan is based on a loop with an early exit. That is
4613 /// the case if the VPlan has either more than one exit block or a single exit
4614 /// block with multiple predecessors (one for the exit via the latch and one
4615 /// via the other early exit).
4616 bool hasEarlyExit() const {
4617 return count_if(ExitBlocks,
4618 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4619 1 ||
4620 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4621 }
4622
4623 /// Returns true if the scalar tail may execute after the vector loop. Note
4624 /// that this relies on unneeded branches to the scalar tail loop being
4625 /// removed.
4626 bool hasScalarTail() const {
4627 return !(!getScalarPreheader()->hasPredecessors() ||
4629 }
4630};
4631
4632#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4633inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4634 Plan.print(OS);
4635 return OS;
4636}
4637#endif
4638
4639} // end namespace llvm
4640
4641#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3600
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3594
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3949
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3977
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4024
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:3979
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3976
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4002
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3960
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3966
iterator end()
Definition VPlan.h:3986
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3984
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:3978
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4037
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:782
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:228
~VPBasicBlock() override
Definition VPlan.h:3970
const_reverse_iterator rbegin() const
Definition VPlan.h:3990
reverse_iterator rend()
Definition VPlan.h:3991
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3964
VPRecipeBase & back()
Definition VPlan.h:3999
const VPRecipeBase & front() const
Definition VPlan.h:3996
const_iterator begin() const
Definition VPlan.h:3985
VPRecipeBase & front()
Definition VPlan.h:3997
const VPRecipeBase & back() const
Definition VPlan.h:3998
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4015
bool empty() const
Definition VPlan.h:3995
const_iterator end() const
Definition VPlan.h:3987
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4010
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4005
reverse_iterator rbegin()
Definition VPlan.h:3989
friend class VPlan
Definition VPlan.h:3950
size_t size() const
Definition VPlan.h:3994
const_reverse_iterator rend() const
Definition VPlan.h:3992
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2528
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2533
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2523
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2544
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2553
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2510
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2505
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2539
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2519
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:657
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:220
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:192
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:212
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:178
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3024
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3008
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3032
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3005
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3535
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3537
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3562
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3542
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3569
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3556
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3559
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3550
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3576
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:332
VPDef(const unsigned char SC)
Definition VPlanValue.h:412
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:3750
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3742
VPValue * getStepValue() const
Definition VPlan.h:3751
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3714
Type * getScalarType() const
Definition VPlan.h:3748
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3730
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3754
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3722
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3650
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3631
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3637
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3643
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3626
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3510
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3515
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3501
const SCEV * getSCEV() const
Definition VPlan.h:3521
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3506
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3159
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3141
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3123
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3111
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3097
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3089
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3093
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3153
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3091
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2027
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2040
static bool classof(const VPValue *V)
Definition VPlan.h:2037
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2063
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2068
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2052
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2060
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2033
VPValue * getStartValue() const
Definition VPlan.h:2055
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2072
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2022
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1786
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1803
unsigned getOpcode() const
Definition VPlan.h:1799
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1780
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4102
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:457
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4126
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4116
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4103
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:482
Class to record and manage LLVM IR flags.
Definition VPlan.h:608
FastMathFlagsTy FMFs
Definition VPlan.h:679
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:739
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:731
WrapFlagsTy WrapFlags
Definition VPlan.h:673
CmpInst::Predicate CmpPredicate
Definition VPlan.h:672
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:725
GEPNoWrapFlags GEPFlags
Definition VPlan.h:677
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:857
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:674
CmpInst::Predicate getPredicate() const
Definition VPlan.h:834
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:864
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:748
ExactFlagsTy ExactFlags
Definition VPlan.h:676
bool hasNoSignedWrap() const
Definition VPlan.h:883
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:894
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:734
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:737
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:742
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:722
bool isNonNeg() const
Definition VPlan.h:866
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:849
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:852
DisjointFlagsTy DisjointFlags
Definition VPlan.h:675
unsigned AllFlags
Definition VPlan.h:681
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:840
bool hasNoUnsignedWrap() const
Definition VPlan.h:872
FCmpFlagsTy FCmpFlags
Definition VPlan.h:680
NonNegFlagsTy NonNegFlags
Definition VPlan.h:678
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:758
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:794
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:745
VPIRFlags(Instruction &I)
Definition VPlan.h:687
Instruction & getInstruction() const
Definition VPlan.h:1457
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1465
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1444
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1471
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1459
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1432
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:980
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1016
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:988
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1000
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1271
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1312
static bool classof(const VPUser *R)
Definition VPlan.h:1297
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1279
Type * getResultType() const
Definition VPlan.h:1318
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1301
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1034
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1183
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1136
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1081
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1126
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1139
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1078
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1130
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1073
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1070
@ VScale
Returns the value for vscale.
Definition VPlan.h:1141
@ CanonicalIVIncrementForPart
Definition VPlan.h:1054
bool hasResult() const
Definition VPlan.h:1207
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1248
unsigned getOpcode() const
Definition VPlan.h:1191
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1251
friend class VPlanSlp
Definition VPlan.h:1035
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2639
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2645
static bool classof(const VPUser *U)
Definition VPlan.h:2621
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2588
Instruction * getInsertPos() const
Definition VPlan.h:2643
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2616
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2641
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2633
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2662
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2627
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2715
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2743
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2737
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2750
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2730
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2717
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2673
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2700
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2683
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2694
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2675
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1330
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1352
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1347
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4093
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1372
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1339
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1357
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1361
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3216
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3198
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3209
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3194
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4254
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:211
friend class VPDef
Definition VPlanValue.h:213
VPRecipeValue(VPDef *Def, Value *UV=nullptr)
Definition VPlan.cpp:139
friend class VPValue
Definition VPlanValue.h:212
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2900
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2879
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2903
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2890
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2466
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2452
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2431
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2445
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2478
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2460
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2469
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2483
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2420
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2475
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2463
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2766
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2775
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2842
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2811
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2826
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2853
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2855
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2838
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2789
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2840
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2796
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2844
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2851
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2846
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2805
static bool classof(const VPUser *U)
Definition VPlan.h:2816
static bool classof(const VPValue *VPV)
Definition VPlan.h:2821
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2860
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4137
const VPBlockBase * getEntry() const
Definition VPlan.h:4173
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4248
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4205
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4190
VPBlockBase * getExiting()
Definition VPlan.h:4186
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4235
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4178
const Type * getCanonicalIVType() const
Definition VPlan.h:4249
const VPBlockBase * getExiting() const
Definition VPlan.h:4185
VPBlockBase * getEntry()
Definition VPlan.h:4174
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4243
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4198
friend class VPlan
Definition VPlan.h:4138
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4169
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2922
bool isSingleScalar() const
Definition VPlan.h:2963
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2930
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2975
bool isPredicated() const
Definition VPlan.h:2965
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2944
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2968
unsigned getOpcode() const
Definition VPlan.h:2992
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:2987
VPValue * getStepValue() const
Definition VPlan.h:3817
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3811
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3782
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3803
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3794
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3775
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3820
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:594
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:597
static bool classof(const VPUser *U)
Definition VPlan.h:586
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:968
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:229
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1428
operand_range operands()
Definition VPlanValue.h:297
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:273
unsigned getNumOperands() const
Definition VPlanValue.h:267
operand_iterator op_end()
Definition VPlanValue.h:295
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:268
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:248
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:291
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:290
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:45
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:133
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:119
friend class VPRecipeValue
Definition VPlanValue.h:51
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:72
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:173
unsigned getNumUsers() const
Definition VPlanValue.h:105
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1902
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1923
const VPValue * getVFValue() const
Definition VPlan.h:1898
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1916
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1909
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1887
Type * getSourceElementType() const
Definition VPlan.h:1957
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1959
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1966
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1944
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:1982
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1973
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1720
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1727
const_operand_range args() const
Definition VPlan.h:1760
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1741
operand_range args()
Definition VPlan.h:1759
Function * getCalledScalarFunction() const
Definition VPlan.h:1755
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3686
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3673
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3668
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1570
Instruction::CastOps getOpcode() const
Definition VPlan.h:1606
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1609
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1578
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1591
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1850
Type * getSourceElementType() const
Definition VPlan.h:1855
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1858
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1842
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1828
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2152
static bool classof(const VPValue *V)
Definition VPlan.h:2103
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2122
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2137
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2115
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2130
PHINode * getPHINode() const
Definition VPlan.h:2132
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2091
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2118
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2135
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2144
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2098
const VPValue * getVFValue() const
Definition VPlan.h:2125
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2108
const VPValue * getStepValue() const
Definition VPlan.h:2119
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2213
const TruncInst * getTruncInst() const
Definition VPlan.h:2229
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2207
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2199
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2173
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2228
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2182
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2244
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2224
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2237
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1620
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1651
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1691
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1700
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1637
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1706
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1672
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1703
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1694
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3247
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3244
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3287
static bool classof(const VPUser *U)
Definition VPlan.h:3281
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3310
Instruction & Ingredient
Definition VPlan.h:3235
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3270
Instruction & getIngredient() const
Definition VPlan.h:3318
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3241
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3274
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3301
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3238
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3297
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3257
void setMask(VPValue *Mask)
Definition VPlan.h:3249
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3307
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3294
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3291
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2338
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2309
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2316
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2271
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2280
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2261
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1522
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1536
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1561
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1526
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1551
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4267
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4546
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1117
friend class VPSlotTracker
Definition VPlan.h:4269
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1093
bool hasVF(ElementCount VF) const
Definition VPlan.h:4464
LLVMContext & getContext() const
Definition VPlan.h:4452
VPBasicBlock * getEntry()
Definition VPlan.h:4356
void setName(const Twine &newName)
Definition VPlan.h:4502
bool hasScalableVF() const
Definition VPlan.h:4465
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4450
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4446
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4414
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4435
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4471
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:901
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:879
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4516
const VPValue & getVF() const
Definition VPlan.h:4447
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:909
const VPBasicBlock * getEntry() const
Definition VPlan.h:4357
friend class VPlanPrinter
Definition VPlan.h:4268
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4525
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4540
unsigned getUF() const
Definition VPlan.h:4484
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4594
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1228
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4549
bool hasUF(unsigned UF) const
Definition VPlan.h:4482
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4404
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4443
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4440
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4506
void setVF(ElementCount VF)
Definition VPlan.h:4458
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4497
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1022
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4616
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1004
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4534
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4390
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4421
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4428
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4381
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4345
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4572
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1234
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4522
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4582
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1123
bool hasScalarVFOnly() const
Definition VPlan.h:4475
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4395
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:916
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1076
void addVF(ElementCount VF)
Definition VPlan.h:4456
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4400
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1038
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4361
void setUF(unsigned UF)
Definition VPlan.h:4489
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4626
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1164
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4338
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4528
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2483
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3883
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2392
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2390
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3914
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3928
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3933
static bool isPossible(SrcTy R)
Definition VPlan.h:3915
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3845
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3866
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3847
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3850
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3837
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2384
Possible variants of a reduction.
Definition VPlan.h:2382
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2387
unsigned VFScaleFactor
Definition VPlan.h:2388
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2353
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2365
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2345
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:639
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:644
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:634
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:627
PHINode & getIRPhi()
Definition VPlan.h:1503
VPIRPhi(PHINode &PN)
Definition VPlan.h:1496
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1498
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1514
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:184
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:137
static bool classof(const VPUser *U)
Definition VPlan.h:1390
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1405
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1420
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1387
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1400
static bool classof(const VPValue *V)
Definition VPlan.h:1395
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:922
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:928
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:923
static bool classof(const VPValue *V)
Definition VPlan.h:947
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:954
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:942
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:202
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3366
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3379
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3367
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3389
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3324
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3346
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3325
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3334
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3450
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3462
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3451
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3475
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3465
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3407
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3425
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3416
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3431
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3408