LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
37#include "llvm/IR/DebugLoc.h"
38#include "llvm/IR/FMF.h"
39#include "llvm/IR/Operator.h"
42#include <cassert>
43#include <cstddef>
44#include <functional>
45#include <string>
46#include <utility>
47#include <variant>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
54class IRBuilderBase;
55struct VPTransformState;
56class raw_ostream;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPBuilder;
62class VPDominatorTree;
63class VPRegionBlock;
64class VPlan;
65class VPLane;
67class VPlanSlp;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
80/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
82 friend class VPBlockUtils;
83
84 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
85
86 /// An optional name for the block.
87 std::string Name;
88
89 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
90 /// it is a topmost VPBlockBase.
91 VPRegionBlock *Parent = nullptr;
92
93 /// List of predecessor blocks.
95
96 /// List of successor blocks.
98
99 /// VPlan containing the block. Can only be set on the entry block of the
100 /// plan.
101 VPlan *Plan = nullptr;
102
103 /// Add \p Successor as the last successor to this block.
104 void appendSuccessor(VPBlockBase *Successor) {
105 assert(Successor && "Cannot add nullptr successor!");
106 Successors.push_back(Successor);
107 }
108
109 /// Add \p Predecessor as the last predecessor to this block.
110 void appendPredecessor(VPBlockBase *Predecessor) {
111 assert(Predecessor && "Cannot add nullptr predecessor!");
112 Predecessors.push_back(Predecessor);
113 }
114
115 /// Remove \p Predecessor from the predecessors of this block.
116 void removePredecessor(VPBlockBase *Predecessor) {
117 auto Pos = find(Predecessors, Predecessor);
118 assert(Pos && "Predecessor does not exist");
119 Predecessors.erase(Pos);
120 }
121
122 /// Remove \p Successor from the successors of this block.
123 void removeSuccessor(VPBlockBase *Successor) {
124 auto Pos = find(Successors, Successor);
125 assert(Pos && "Successor does not exist");
126 Successors.erase(Pos);
127 }
128
129 /// This function replaces one predecessor with another, useful when
130 /// trying to replace an old block in the CFG with a new one.
131 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
132 auto I = find(Predecessors, Old);
133 assert(I != Predecessors.end());
134 assert(Old->getParent() == New->getParent() &&
135 "replaced predecessor must have the same parent");
136 *I = New;
137 }
138
139 /// This function replaces one successor with another, useful when
140 /// trying to replace an old block in the CFG with a new one.
141 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
142 auto I = find(Successors, Old);
143 assert(I != Successors.end());
144 assert(Old->getParent() == New->getParent() &&
145 "replaced successor must have the same parent");
146 *I = New;
147 }
148
149protected:
150 VPBlockBase(const unsigned char SC, const std::string &N)
151 : SubclassID(SC), Name(N) {}
152
153public:
154 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
155 /// that are actually instantiated. Values of this enumeration are kept in the
156 /// SubclassID field of the VPBlockBase objects. They are used for concrete
157 /// type identification.
158 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
159
161
162 virtual ~VPBlockBase() = default;
163
164 const std::string &getName() const { return Name; }
165
166 void setName(const Twine &newName) { Name = newName.str(); }
167
168 /// \return an ID for the concrete type of this object.
169 /// This is used to implement the classof checks. This should not be used
170 /// for any other purpose, as the values may change as LLVM evolves.
171 unsigned getVPBlockID() const { return SubclassID; }
172
173 VPRegionBlock *getParent() { return Parent; }
174 const VPRegionBlock *getParent() const { return Parent; }
175
176 /// \return A pointer to the plan containing the current block.
177 VPlan *getPlan();
178 const VPlan *getPlan() const;
179
180 /// Sets the pointer of the plan containing the block. The block must be the
181 /// entry block into the VPlan.
182 void setPlan(VPlan *ParentPlan);
183
184 void setParent(VPRegionBlock *P) { Parent = P; }
185
186 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
187 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
188 /// VPBlockBase is a VPBasicBlock, it is returned.
189 const VPBasicBlock *getEntryBasicBlock() const;
190 VPBasicBlock *getEntryBasicBlock();
191
192 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
193 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
194 /// VPBlockBase is a VPBasicBlock, it is returned.
195 const VPBasicBlock *getExitingBasicBlock() const;
196 VPBasicBlock *getExitingBasicBlock();
197
198 const VPBlocksTy &getSuccessors() const { return Successors; }
199 VPBlocksTy &getSuccessors() { return Successors; }
200
203
204 const VPBlocksTy &getPredecessors() const { return Predecessors; }
205 VPBlocksTy &getPredecessors() { return Predecessors; }
206
207 /// \return the successor of this VPBlockBase if it has a single successor.
208 /// Otherwise return a null pointer.
210 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
211 }
212
213 /// \return the predecessor of this VPBlockBase if it has a single
214 /// predecessor. Otherwise return a null pointer.
216 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
217 }
218
219 size_t getNumSuccessors() const { return Successors.size(); }
220 size_t getNumPredecessors() const { return Predecessors.size(); }
221
222 /// Returns true if this block has any predecessors.
223 bool hasPredecessors() const { return !Predecessors.empty(); }
224
225 /// An Enclosing Block of a block B is any block containing B, including B
226 /// itself. \return the closest enclosing block starting from "this", which
227 /// has successors. \return the root enclosing block if all enclosing blocks
228 /// have no successors.
229 VPBlockBase *getEnclosingBlockWithSuccessors();
230
231 /// \return the closest enclosing block starting from "this", which has
232 /// predecessors. \return the root enclosing block if all enclosing blocks
233 /// have no predecessors.
234 VPBlockBase *getEnclosingBlockWithPredecessors();
235
236 /// \return the successors either attached directly to this VPBlockBase or, if
237 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
238 /// successors of its own, search recursively for the first enclosing
239 /// VPRegionBlock that has successors and return them. If no such
240 /// VPRegionBlock exists, return the (empty) successors of the topmost
241 /// VPBlockBase reached.
243 return getEnclosingBlockWithSuccessors()->getSuccessors();
244 }
245
246 /// \return the hierarchical successor of this VPBlockBase if it has a single
247 /// hierarchical successor. Otherwise return a null pointer.
249 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
250 }
251
252 /// \return the predecessors either attached directly to this VPBlockBase or,
253 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
254 /// predecessors of its own, search recursively for the first enclosing
255 /// VPRegionBlock that has predecessors and return them. If no such
256 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
257 /// VPBlockBase reached.
259 return getEnclosingBlockWithPredecessors()->getPredecessors();
260 }
261
262 /// \return the hierarchical predecessor of this VPBlockBase if it has a
263 /// single hierarchical predecessor. Otherwise return a null pointer.
267
268 /// Set a given VPBlockBase \p Successor as the single successor of this
269 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
270 /// This VPBlockBase must have no successors.
272 assert(Successors.empty() && "Setting one successor when others exist.");
273 assert(Successor->getParent() == getParent() &&
274 "connected blocks must have the same parent");
275 appendSuccessor(Successor);
276 }
277
278 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
279 /// successors of this VPBlockBase. This VPBlockBase is not added as
280 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
281 /// successors.
282 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
283 assert(Successors.empty() && "Setting two successors when others exist.");
284 appendSuccessor(IfTrue);
285 appendSuccessor(IfFalse);
286 }
287
288 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
289 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
290 /// as successor of any VPBasicBlock in \p NewPreds.
292 assert(Predecessors.empty() && "Block predecessors already set.");
293 for (auto *Pred : NewPreds)
294 appendPredecessor(Pred);
295 }
296
297 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
298 /// This VPBlockBase must have no successors. This VPBlockBase is not added
299 /// as predecessor of any VPBasicBlock in \p NewSuccs.
301 assert(Successors.empty() && "Block successors already set.");
302 for (auto *Succ : NewSuccs)
303 appendSuccessor(Succ);
304 }
305
306 /// Remove all the predecessor of this block.
307 void clearPredecessors() { Predecessors.clear(); }
308
309 /// Remove all the successors of this block.
310 void clearSuccessors() { Successors.clear(); }
311
312 /// Swap predecessors of the block. The block must have exactly 2
313 /// predecessors.
315 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
316 std::swap(Predecessors[0], Predecessors[1]);
317 }
318
319 /// Swap successors of the block. The block must have exactly 2 successors.
320 // TODO: This should be part of introducing conditional branch recipes rather
321 // than being independent.
323 assert(Successors.size() == 2 && "must have 2 successors to swap");
324 std::swap(Successors[0], Successors[1]);
325 }
326
327 /// Returns the index for \p Pred in the blocks predecessors list.
328 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
329 assert(count(Predecessors, Pred) == 1 &&
330 "must have Pred exactly once in Predecessors");
331 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
332 }
333
334 /// Returns the index for \p Succ in the blocks successor list.
335 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
336 assert(count(Successors, Succ) == 1 &&
337 "must have Succ exactly once in Successors");
338 return std::distance(Successors.begin(), find(Successors, Succ));
339 }
340
341 /// The method which generates the output IR that correspond to this
342 /// VPBlockBase, thereby "executing" the VPlan.
343 virtual void execute(VPTransformState *State) = 0;
344
345 /// Return the cost of the block.
347
348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
349 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
350 OS << getName();
351 }
352
353 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
354 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
355 /// consequtive numbers.
356 ///
357 /// Note that the numbering is applied to the whole VPlan, so printing
358 /// individual blocks is consistent with the whole VPlan printing.
359 virtual void print(raw_ostream &O, const Twine &Indent,
360 VPSlotTracker &SlotTracker) const = 0;
361
362 /// Print plain-text dump of this VPlan to \p O.
363 void print(raw_ostream &O) const;
364
365 /// Print the successors of this block to \p O, prefixing all lines with \p
366 /// Indent.
367 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
368
369 /// Dump this VPBlockBase to dbgs().
370 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
371#endif
372
373 /// Clone the current block and it's recipes without updating the operands of
374 /// the cloned recipes, including all blocks in the single-entry single-exit
375 /// region for VPRegionBlocks.
376 virtual VPBlockBase *clone() = 0;
377};
378
379/// VPRecipeBase is a base class modeling a sequence of one or more output IR
380/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
381/// and is responsible for deleting its defined values. Single-value
382/// recipes must inherit from VPSingleDef instead of inheriting from both
383/// VPRecipeBase and VPValue separately.
385 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
386 public VPDef,
387 public VPUser {
388 friend VPBasicBlock;
389 friend class VPBlockUtils;
390
391 /// Each VPRecipe belongs to a single VPBasicBlock.
392 VPBasicBlock *Parent = nullptr;
393
394 /// The debug location for the recipe.
395 DebugLoc DL;
396
397public:
398 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
400 : VPDef(SC), VPUser(Operands), DL(DL) {}
401
402 ~VPRecipeBase() override = default;
403
404 /// Clone the current recipe.
405 virtual VPRecipeBase *clone() = 0;
406
407 /// \return the VPBasicBlock which this VPRecipe belongs to.
408 VPBasicBlock *getParent() { return Parent; }
409 const VPBasicBlock *getParent() const { return Parent; }
410
411 /// \return the VPRegionBlock which the recipe belongs to.
412 VPRegionBlock *getRegion();
413 const VPRegionBlock *getRegion() const;
414
415 /// The method which generates the output IR instructions that correspond to
416 /// this VPRecipe, thereby "executing" the VPlan.
417 virtual void execute(VPTransformState &State) = 0;
418
419 /// Return the cost of this recipe, taking into account if the cost
420 /// computation should be skipped and the ForceTargetInstructionCost flag.
421 /// Also takes care of printing the cost for debugging.
423
424 /// Insert an unlinked recipe into a basic block immediately before
425 /// the specified recipe.
426 void insertBefore(VPRecipeBase *InsertPos);
427 /// Insert an unlinked recipe into \p BB immediately before the insertion
428 /// point \p IP;
429 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
430
431 /// Insert an unlinked Recipe into a basic block immediately after
432 /// the specified Recipe.
433 void insertAfter(VPRecipeBase *InsertPos);
434
435 /// Unlink this recipe from its current VPBasicBlock and insert it into
436 /// the VPBasicBlock that MovePos lives in, right after MovePos.
437 void moveAfter(VPRecipeBase *MovePos);
438
439 /// Unlink this recipe and insert into BB before I.
440 ///
441 /// \pre I is a valid iterator into BB.
442 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
443
444 /// This method unlinks 'this' from the containing basic block, but does not
445 /// delete it.
446 void removeFromParent();
447
448 /// This method unlinks 'this' from the containing basic block and deletes it.
449 ///
450 /// \returns an iterator pointing to the element after the erased one
452
453 /// Method to support type inquiry through isa, cast, and dyn_cast.
454 static inline bool classof(const VPDef *D) {
455 // All VPDefs are also VPRecipeBases.
456 return true;
457 }
458
459 static inline bool classof(const VPUser *U) { return true; }
460
461 /// Returns true if the recipe may have side-effects.
462 bool mayHaveSideEffects() const;
463
464 /// Returns true for PHI-like recipes.
465 bool isPhi() const;
466
467 /// Returns true if the recipe may read from memory.
468 bool mayReadFromMemory() const;
469
470 /// Returns true if the recipe may write to memory.
471 bool mayWriteToMemory() const;
472
473 /// Returns true if the recipe may read from or write to memory.
474 bool mayReadOrWriteMemory() const {
476 }
477
478 /// Returns the debug location of the recipe.
479 DebugLoc getDebugLoc() const { return DL; }
480
481 /// Return true if the recipe is a scalar cast.
482 bool isScalarCast() const;
483
484 /// Set the recipe's debug location to \p NewDL.
485 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
486
487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
488 /// Print the recipe, delegating to printRecipe().
489 void print(raw_ostream &O, const Twine &Indent,
490 VPSlotTracker &SlotTracker) const override final;
491#endif
492
493protected:
494 /// Compute the cost of this recipe either using a recipe's specialized
495 /// implementation or using the legacy cost model and the underlying
496 /// instructions.
497 virtual InstructionCost computeCost(ElementCount VF,
498 VPCostContext &Ctx) const;
499
500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
501 /// Each concrete VPRecipe prints itself, without printing common information,
502 /// like debug info or metadata.
503 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
504 VPSlotTracker &SlotTracker) const = 0;
505#endif
506};
507
508// Helper macro to define common classof implementations for recipes.
509#define VP_CLASSOF_IMPL(VPDefID) \
510 static inline bool classof(const VPDef *D) { \
511 return D->getVPDefID() == VPDefID; \
512 } \
513 static inline bool classof(const VPValue *V) { \
514 auto *R = V->getDefiningRecipe(); \
515 return R && R->getVPDefID() == VPDefID; \
516 } \
517 static inline bool classof(const VPUser *U) { \
518 auto *R = dyn_cast<VPRecipeBase>(U); \
519 return R && R->getVPDefID() == VPDefID; \
520 } \
521 static inline bool classof(const VPRecipeBase *R) { \
522 return R->getVPDefID() == VPDefID; \
523 } \
524 static inline bool classof(const VPSingleDefRecipe *R) { \
525 return R->getVPDefID() == VPDefID; \
526 }
527
528/// VPSingleDef is a base class for recipes for modeling a sequence of one or
529/// more output IR that define a single result VPValue.
530/// Note that VPRecipeBase must be inherited from before VPValue.
531class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
532public:
533 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
535 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
536
537 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
539 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
540
541 static inline bool classof(const VPRecipeBase *R) {
542 switch (R->getVPDefID()) {
543 case VPRecipeBase::VPDerivedIVSC:
544 case VPRecipeBase::VPEVLBasedIVPHISC:
545 case VPRecipeBase::VPExpandSCEVSC:
546 case VPRecipeBase::VPExpressionSC:
547 case VPRecipeBase::VPInstructionSC:
548 case VPRecipeBase::VPReductionEVLSC:
549 case VPRecipeBase::VPReductionSC:
550 case VPRecipeBase::VPReplicateSC:
551 case VPRecipeBase::VPScalarIVStepsSC:
552 case VPRecipeBase::VPVectorPointerSC:
553 case VPRecipeBase::VPVectorEndPointerSC:
554 case VPRecipeBase::VPWidenCallSC:
555 case VPRecipeBase::VPWidenCanonicalIVSC:
556 case VPRecipeBase::VPWidenCastSC:
557 case VPRecipeBase::VPWidenGEPSC:
558 case VPRecipeBase::VPWidenIntrinsicSC:
559 case VPRecipeBase::VPWidenSC:
560 case VPRecipeBase::VPWidenSelectSC:
561 case VPRecipeBase::VPBlendSC:
562 case VPRecipeBase::VPPredInstPHISC:
563 case VPRecipeBase::VPCanonicalIVPHISC:
564 case VPRecipeBase::VPActiveLaneMaskPHISC:
565 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
566 case VPRecipeBase::VPWidenPHISC:
567 case VPRecipeBase::VPWidenIntOrFpInductionSC:
568 case VPRecipeBase::VPWidenPointerInductionSC:
569 case VPRecipeBase::VPReductionPHISC:
570 return true;
571 case VPRecipeBase::VPBranchOnMaskSC:
572 case VPRecipeBase::VPInterleaveEVLSC:
573 case VPRecipeBase::VPInterleaveSC:
574 case VPRecipeBase::VPIRInstructionSC:
575 case VPRecipeBase::VPWidenLoadEVLSC:
576 case VPRecipeBase::VPWidenLoadSC:
577 case VPRecipeBase::VPWidenStoreEVLSC:
578 case VPRecipeBase::VPWidenStoreSC:
579 case VPRecipeBase::VPHistogramSC:
580 // TODO: Widened stores don't define a value, but widened loads do. Split
581 // the recipes to be able to make widened loads VPSingleDefRecipes.
582 return false;
583 }
584 llvm_unreachable("Unhandled VPDefID");
585 }
586
587 static inline bool classof(const VPUser *U) {
588 auto *R = dyn_cast<VPRecipeBase>(U);
589 return R && classof(R);
590 }
591
592 VPSingleDefRecipe *clone() override = 0;
593
594 /// Returns the underlying instruction.
601
602#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
603 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
605#endif
606};
607
608/// Class to record and manage LLVM IR flags.
610 enum class OperationType : unsigned char {
611 Cmp,
612 FCmp,
613 OverflowingBinOp,
614 Trunc,
615 DisjointOp,
616 PossiblyExactOp,
617 GEPOp,
618 FPMathOp,
619 NonNegOp,
620 Other
621 };
622
623public:
624 struct WrapFlagsTy {
625 char HasNUW : 1;
626 char HasNSW : 1;
627
629 };
630
632 char HasNUW : 1;
633 char HasNSW : 1;
634
636 };
637
642
644 char NonNeg : 1;
645 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
646 };
647
648private:
649 struct ExactFlagsTy {
650 char IsExact : 1;
651 };
652 struct FastMathFlagsTy {
653 char AllowReassoc : 1;
654 char NoNaNs : 1;
655 char NoInfs : 1;
656 char NoSignedZeros : 1;
657 char AllowReciprocal : 1;
658 char AllowContract : 1;
659 char ApproxFunc : 1;
660
661 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
662 };
663 /// Holds both the predicate and fast-math flags for floating-point
664 /// comparisons.
665 struct FCmpFlagsTy {
667 FastMathFlagsTy FMFs;
668 };
669
670 OperationType OpType;
671
672 union {
677 ExactFlagsTy ExactFlags;
680 FastMathFlagsTy FMFs;
681 FCmpFlagsTy FCmpFlags;
682 unsigned AllFlags;
683 };
684
685public:
686 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
687
689 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
690 OpType = OperationType::FCmp;
691 FCmpFlags.Pred = FCmp->getPredicate();
692 FCmpFlags.FMFs = FCmp->getFastMathFlags();
693 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
694 OpType = OperationType::Cmp;
695 CmpPredicate = Op->getPredicate();
696 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
697 OpType = OperationType::DisjointOp;
698 DisjointFlags.IsDisjoint = Op->isDisjoint();
699 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
700 OpType = OperationType::OverflowingBinOp;
701 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
702 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
703 OpType = OperationType::Trunc;
704 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
705 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
706 OpType = OperationType::PossiblyExactOp;
707 ExactFlags.IsExact = Op->isExact();
708 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
709 OpType = OperationType::GEPOp;
710 GEPFlags = GEP->getNoWrapFlags();
711 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
712 OpType = OperationType::NonNegOp;
713 NonNegFlags.NonNeg = PNNI->hasNonNeg();
714 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
715 OpType = OperationType::FPMathOp;
716 FMFs = Op->getFastMathFlags();
717 } else {
718 OpType = OperationType::Other;
719 AllFlags = 0;
720 }
721 }
722
724 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
725
727 : OpType(OperationType::FCmp) {
728 FCmpFlags.Pred = Pred;
729 FCmpFlags.FMFs = FMFs;
730 }
731
733 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
734
736 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
737
738 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
739
741 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
742
744 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
745
747 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
748
750 OpType = Other.OpType;
751 AllFlags = Other.AllFlags;
752 }
753
754 /// Only keep flags also present in \p Other. \p Other must have the same
755 /// OpType as the current object.
756 void intersectFlags(const VPIRFlags &Other);
757
758 /// Drop all poison-generating flags.
760 // NOTE: This needs to be kept in-sync with
761 // Instruction::dropPoisonGeneratingFlags.
762 switch (OpType) {
763 case OperationType::OverflowingBinOp:
764 WrapFlags.HasNUW = false;
765 WrapFlags.HasNSW = false;
766 break;
767 case OperationType::Trunc:
768 TruncFlags.HasNUW = false;
769 TruncFlags.HasNSW = false;
770 break;
771 case OperationType::DisjointOp:
772 DisjointFlags.IsDisjoint = false;
773 break;
774 case OperationType::PossiblyExactOp:
775 ExactFlags.IsExact = false;
776 break;
777 case OperationType::GEPOp:
779 break;
780 case OperationType::FPMathOp:
781 case OperationType::FCmp:
782 getFMFsRef().NoNaNs = false;
783 getFMFsRef().NoInfs = false;
784 break;
785 case OperationType::NonNegOp:
786 NonNegFlags.NonNeg = false;
787 break;
788 case OperationType::Cmp:
789 case OperationType::Other:
790 break;
791 }
792 }
793
794 /// Apply the IR flags to \p I.
795 void applyFlags(Instruction &I) const {
796 switch (OpType) {
797 case OperationType::OverflowingBinOp:
798 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
799 I.setHasNoSignedWrap(WrapFlags.HasNSW);
800 break;
801 case OperationType::Trunc:
802 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
803 I.setHasNoSignedWrap(TruncFlags.HasNSW);
804 break;
805 case OperationType::DisjointOp:
806 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
807 break;
808 case OperationType::PossiblyExactOp:
809 I.setIsExact(ExactFlags.IsExact);
810 break;
811 case OperationType::GEPOp:
812 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
813 break;
814 case OperationType::FPMathOp:
815 case OperationType::FCmp: {
816 const FastMathFlagsTy &F = getFMFsRef();
817 I.setHasAllowReassoc(F.AllowReassoc);
818 I.setHasNoNaNs(F.NoNaNs);
819 I.setHasNoInfs(F.NoInfs);
820 I.setHasNoSignedZeros(F.NoSignedZeros);
821 I.setHasAllowReciprocal(F.AllowReciprocal);
822 I.setHasAllowContract(F.AllowContract);
823 I.setHasApproxFunc(F.ApproxFunc);
824 break;
825 }
826 case OperationType::NonNegOp:
827 I.setNonNeg(NonNegFlags.NonNeg);
828 break;
829 case OperationType::Cmp:
830 case OperationType::Other:
831 break;
832 }
833 }
834
836 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
837 "recipe doesn't have a compare predicate");
838 return OpType == OperationType::FCmp ? FCmpFlags.Pred : CmpPredicate;
839 }
840
842 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
843 "recipe doesn't have a compare predicate");
844 if (OpType == OperationType::FCmp)
845 FCmpFlags.Pred = Pred;
846 else
847 CmpPredicate = Pred;
848 }
849
851
852 /// Returns true if the recipe has a comparison predicate.
853 bool hasPredicate() const {
854 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
855 }
856
857 /// Returns true if the recipe has fast-math flags.
858 bool hasFastMathFlags() const {
859 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp;
860 }
861
863
864 /// Returns true if the recipe has non-negative flag.
865 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
866
867 bool isNonNeg() const {
868 assert(OpType == OperationType::NonNegOp &&
869 "recipe doesn't have a NNEG flag");
870 return NonNegFlags.NonNeg;
871 }
872
873 bool hasNoUnsignedWrap() const {
874 switch (OpType) {
875 case OperationType::OverflowingBinOp:
876 return WrapFlags.HasNUW;
877 case OperationType::Trunc:
878 return TruncFlags.HasNUW;
879 default:
880 llvm_unreachable("recipe doesn't have a NUW flag");
881 }
882 }
883
884 bool hasNoSignedWrap() const {
885 switch (OpType) {
886 case OperationType::OverflowingBinOp:
887 return WrapFlags.HasNSW;
888 case OperationType::Trunc:
889 return TruncFlags.HasNSW;
890 default:
891 llvm_unreachable("recipe doesn't have a NSW flag");
892 }
893 }
894
895 bool isDisjoint() const {
896 assert(OpType == OperationType::DisjointOp &&
897 "recipe cannot have a disjoing flag");
898 return DisjointFlags.IsDisjoint;
899 }
900
901private:
902 /// Get a reference to the fast-math flags for FPMathOp or FCmp.
903 FastMathFlagsTy &getFMFsRef() {
904 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
905 }
906 const FastMathFlagsTy &getFMFsRef() const {
907 return OpType == OperationType::FCmp ? FCmpFlags.FMFs : FMFs;
908 }
909
910public:
911#if !defined(NDEBUG)
912 /// Returns true if the set flags are valid for \p Opcode.
913 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
914#endif
915
916#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
917 void printFlags(raw_ostream &O) const;
918#endif
919};
920
921/// A pure-virtual common base class for recipes defining a single VPValue and
922/// using IR flags.
924 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
925 const VPIRFlags &Flags,
927 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
928
929 static inline bool classof(const VPRecipeBase *R) {
930 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
931 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
932 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
933 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
934 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
935 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
936 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
937 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
938 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
939 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
940 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
941 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
942 }
943
944 static inline bool classof(const VPUser *U) {
945 auto *R = dyn_cast<VPRecipeBase>(U);
946 return R && classof(R);
947 }
948
949 static inline bool classof(const VPValue *V) {
950 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
951 return R && classof(R);
952 }
953
954 VPRecipeWithIRFlags *clone() override = 0;
955
956 static inline bool classof(const VPSingleDefRecipe *U) {
957 auto *R = dyn_cast<VPRecipeBase>(U);
958 return R && classof(R);
959 }
960
961 void execute(VPTransformState &State) override = 0;
962
963 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
965 VPCostContext &Ctx) const;
966};
967
968/// Helper to access the operand that contains the unroll part for this recipe
969/// after unrolling.
970template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
971protected:
972 /// Return the VPValue operand containing the unroll part or null if there is
973 /// no such operand.
974 VPValue *getUnrollPartOperand(const VPUser &U) const;
975
976 /// Return the unroll part.
977 unsigned getUnrollPart(const VPUser &U) const;
978};
979
980/// Helper to manage IR metadata for recipes. It filters out metadata that
981/// cannot be propagated.
984
985public:
986 VPIRMetadata() = default;
987
988 /// Adds metatadata that can be preserved from the original instruction
989 /// \p I.
991
992 /// Copy constructor for cloning.
993 VPIRMetadata(const VPIRMetadata &Other) = default;
994
996
997 /// Add all metadata to \p I.
998 void applyMetadata(Instruction &I) const;
999
1000 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1001 /// already exists, it will be replaced. Otherwise, it will be added.
1002 void setMetadata(unsigned Kind, MDNode *Node) {
1003 auto It =
1004 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1005 return P.first == Kind;
1006 });
1007 if (It != Metadata.end())
1008 It->second = Node;
1009 else
1010 Metadata.emplace_back(Kind, Node);
1011 }
1012
1013 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1014 /// nodes that are common to both.
1015 void intersect(const VPIRMetadata &MD);
1016
1017 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1018 MDNode *getMetadata(unsigned Kind) const {
1019 auto It =
1020 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1021 return It != Metadata.end() ? It->second : nullptr;
1022 }
1023
1024#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1025 /// Print metadata with node IDs.
1026 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1027#endif
1028};
1029
1030/// This is a concrete Recipe that models a single VPlan-level instruction.
1031/// While as any Recipe it may generate a sequence of IR instructions when
1032/// executed, these instructions would always form a single-def expression as
1033/// the VPInstruction is also a single def-use vertex.
1035 public VPIRMetadata,
1036 public VPUnrollPartAccessor<1> {
1037 friend class VPlanSlp;
1038
1039public:
1040 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1041 enum {
1043 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1044 // values of a first-order recurrence.
1048 // Creates a mask where each lane is active (true) whilst the current
1049 // counter (first operand + index) is less than the second operand. i.e.
1050 // mask[i] = icmpt ult (op0 + i), op1
1051 // The size of the mask returned is VF * Multiplier (UF, third op).
1055 // Increment the canonical IV separately for each unrolled part.
1057 // Abstract instruction that compares two values and branches. This is
1058 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1062 /// Given operands of (the same) struct type, creates a struct of fixed-
1063 /// width vectors each containing a struct field of all operands. The
1064 /// number of operands matches the element count of every vector.
1066 /// Creates a fixed-width vector containing all operands. The number of
1067 /// operands matches the vector element count.
1069 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1070 /// abstract VPInstruction whose single defined VPValue represents VF
1071 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1072 /// VPInstructions.
1074 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1075 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1079 // Extracts the last part of its operand. Removed during unrolling.
1081 // Extracts the last lane of its vector operand, per part.
1083 // Extracts the second-to-last lane from its operand or the second-to-last
1084 // part if it is scalar. In the latter case, the recipe will be removed
1085 // during unrolling.
1087 LogicalAnd, // Non-poison propagating logical And.
1088 // Add an offset in bytes (second operand) to a base pointer (first
1089 // operand). Only generates scalar values (either for the first lane only or
1090 // for all lanes, depending on its uses).
1092 // Add a vector offset in bytes (second operand) to a scalar base pointer
1093 // (first operand).
1095 // Returns a scalar boolean value, which is true if any lane of its
1096 // (boolean) vector operands is true. It produces the reduced value across
1097 // all unrolled iterations. Unrolling will add all copies of its original
1098 // operand as additional operands. AnyOf is poison-safe as all operands
1099 // will be frozen.
1101 // Calculates the first active lane index of the vector predicate operands.
1102 // It produces the lane index across all unrolled iterations. Unrolling will
1103 // add all copies of its original operand as additional operands.
1104 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1105 // result even with operands that are all zeroes.
1107 // Calculates the last active lane index of the vector predicate operands.
1108 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1109 // tail-folding to extract the correct live-out value from the last active
1110 // iteration. It produces the lane index across all unrolled iterations.
1111 // Unrolling will add all copies of its original operand as additional
1112 // operands.
1114 // Returns a reversed vector for the operand.
1116
1117 // The opcodes below are used for VPInstructionWithType.
1118 //
1119 /// Scale the first operand (vector step) by the second operand
1120 /// (scalar-step). Casts both operands to the result type if needed.
1122 /// Start vector for reductions with 3 operands: the original start value,
1123 /// the identity value for the reduction and an integer indicating the
1124 /// scaling factor.
1126 // Creates a step vector starting from 0 to VF with a step of 1.
1128 /// Extracts a single lane (first operand) from a set of vector operands.
1129 /// The lane specifies an index into a vector formed by combining all vector
1130 /// operands (all operands after the first one).
1132 /// Explicit user for the resume phi of the canonical induction in the main
1133 /// VPlan, used by the epilogue vector loop.
1135 /// Returns the value for vscale.
1138 };
1139
1140 /// Returns true if this VPInstruction generates scalar values for all lanes.
1141 /// Most VPInstructions generate a single value per part, either vector or
1142 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1143 /// values per all lanes, stemming from an original ingredient. This method
1144 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1145 /// underlying ingredient.
1146 bool doesGeneratePerAllLanes() const;
1147
1148private:
1149 typedef unsigned char OpcodeTy;
1150 OpcodeTy Opcode;
1151
1152 /// An optional name that can be used for the generated IR instruction.
1153 std::string Name;
1154
1155 /// Returns true if we can generate a scalar for the first lane only if
1156 /// needed.
1157 bool canGenerateScalarForFirstLane() const;
1158
1159 /// Utility methods serving execute(): generates a single vector instance of
1160 /// the modeled instruction. \returns the generated value. . In some cases an
1161 /// existing value is returned rather than a generated one.
1162 Value *generate(VPTransformState &State);
1163
1164#if !defined(NDEBUG)
1165 /// Return the number of operands determined by the opcode of the
1166 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1167 /// directly by the opcode.
1168 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1169#endif
1170
1171public:
1172 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1173 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1174 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1175
1176 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1177
1178 VPInstruction *clone() override {
1179 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1180 getDebugLoc(), Name);
1181 if (getUnderlyingValue())
1182 New->setUnderlyingValue(getUnderlyingInstr());
1183 return New;
1184 }
1185
1186 unsigned getOpcode() const { return Opcode; }
1187
1188 /// Generate the instruction.
1189 /// TODO: We currently execute only per-part unless a specific instance is
1190 /// provided.
1191 void execute(VPTransformState &State) override;
1192
1193 /// Return the cost of this VPInstruction.
1194 InstructionCost computeCost(ElementCount VF,
1195 VPCostContext &Ctx) const override;
1196
1197#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1198 /// Print the VPInstruction to dbgs() (for debugging).
1199 LLVM_DUMP_METHOD void dump() const;
1200#endif
1201
1202 bool hasResult() const {
1203 // CallInst may or may not have a result, depending on the called function.
1204 // Conservatively return calls have results for now.
1205 switch (getOpcode()) {
1206 case Instruction::Ret:
1207 case Instruction::Br:
1208 case Instruction::Store:
1209 case Instruction::Switch:
1210 case Instruction::IndirectBr:
1211 case Instruction::Resume:
1212 case Instruction::CatchRet:
1213 case Instruction::Unreachable:
1214 case Instruction::Fence:
1215 case Instruction::AtomicRMW:
1218 return false;
1219 default:
1220 return true;
1221 }
1222 }
1223
1224 /// Returns true if the underlying opcode may read from or write to memory.
1225 bool opcodeMayReadOrWriteFromMemory() const;
1226
1227 /// Returns true if the recipe only uses the first lane of operand \p Op.
1228 bool usesFirstLaneOnly(const VPValue *Op) const override;
1229
1230 /// Returns true if the recipe only uses the first part of operand \p Op.
1231 bool usesFirstPartOnly(const VPValue *Op) const override;
1232
1233 /// Returns true if this VPInstruction produces a scalar value from a vector,
1234 /// e.g. by performing a reduction or extracting a lane.
1235 bool isVectorToScalar() const;
1236
1237 /// Returns true if this VPInstruction's operands are single scalars and the
1238 /// result is also a single scalar.
1239 bool isSingleScalar() const;
1240
1241 /// Returns the symbolic name assigned to the VPInstruction.
1242 StringRef getName() const { return Name; }
1243
1244 /// Set the symbolic name for the VPInstruction.
1245 void setName(StringRef NewName) { Name = NewName.str(); }
1246
1247protected:
1248#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1249 /// Print the VPInstruction to \p O.
1250 void printRecipe(raw_ostream &O, const Twine &Indent,
1251 VPSlotTracker &SlotTracker) const override;
1252#endif
1253};
1254
1255/// A specialization of VPInstruction augmenting it with a dedicated result
1256/// type, to be used when the opcode and operands of the VPInstruction don't
1257/// directly determine the result type. Note that there is no separate VPDef ID
1258/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1259/// distinguished purely by the opcode.
1261 /// Scalar result type produced by the recipe.
1262 Type *ResultTy;
1263
1264public:
1266 Type *ResultTy, const VPIRFlags &Flags = {},
1267 const VPIRMetadata &Metadata = {},
1269 const Twine &Name = "")
1270 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1271 ResultTy(ResultTy) {}
1272
1273 static inline bool classof(const VPRecipeBase *R) {
1274 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1275 // type information.
1276 if (R->isScalarCast())
1277 return true;
1278 auto *VPI = dyn_cast<VPInstruction>(R);
1279 if (!VPI)
1280 return false;
1281 switch (VPI->getOpcode()) {
1285 return true;
1286 default:
1287 return false;
1288 }
1289 }
1290
1291 static inline bool classof(const VPUser *R) {
1293 }
1294
1295 VPInstruction *clone() override {
1296 auto *New =
1298 *this, *this, getDebugLoc(), getName());
1299 New->setUnderlyingValue(getUnderlyingValue());
1300 return New;
1301 }
1302
1303 void execute(VPTransformState &State) override;
1304
1305 /// Return the cost of this VPInstruction.
1307 VPCostContext &Ctx) const override {
1308 // TODO: Compute accurate cost after retiring the legacy cost model.
1309 return 0;
1310 }
1311
1312 Type *getResultType() const { return ResultTy; }
1313
1314protected:
1315#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1316 /// Print the recipe.
1317 void printRecipe(raw_ostream &O, const Twine &Indent,
1318 VPSlotTracker &SlotTracker) const override;
1319#endif
1320};
1321
1322/// Helper type to provide functions to access incoming values and blocks for
1323/// phi-like recipes.
1325protected:
1326 /// Return a VPRecipeBase* to the current object.
1327 virtual const VPRecipeBase *getAsRecipe() const = 0;
1328
1329public:
1330 virtual ~VPPhiAccessors() = default;
1331
1332 /// Returns the incoming VPValue with index \p Idx.
1333 VPValue *getIncomingValue(unsigned Idx) const {
1334 return getAsRecipe()->getOperand(Idx);
1335 }
1336
1337 /// Returns the incoming block with index \p Idx.
1338 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1339
1340 /// Returns the number of incoming values, also number of incoming blocks.
1341 virtual unsigned getNumIncoming() const {
1342 return getAsRecipe()->getNumOperands();
1343 }
1344
1345 /// Returns an interator range over the incoming values.
1347 return make_range(getAsRecipe()->op_begin(),
1348 getAsRecipe()->op_begin() + getNumIncoming());
1349 }
1350
1352 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1353
1354 /// Returns an iterator range over the incoming blocks.
1356 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1357 return getIncomingBlock(Idx);
1358 };
1359 return map_range(index_range(0, getNumIncoming()), GetBlock);
1360 }
1361
1362 /// Returns an iterator range over pairs of incoming values and corresponding
1363 /// incoming blocks.
1369
1370 /// Removes the incoming value for \p IncomingBlock, which must be a
1371 /// predecessor.
1372 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1373
1374#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1375 /// Print the recipe.
1377#endif
1378};
1379
1381 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1382 : VPInstruction(Instruction::PHI, Operands, {}, {}, DL, Name) {}
1383
1384 static inline bool classof(const VPUser *U) {
1385 auto *VPI = dyn_cast<VPInstruction>(U);
1386 return VPI && VPI->getOpcode() == Instruction::PHI;
1387 }
1388
1389 static inline bool classof(const VPValue *V) {
1390 auto *VPI = dyn_cast<VPInstruction>(V);
1391 return VPI && VPI->getOpcode() == Instruction::PHI;
1392 }
1393
1394 static inline bool classof(const VPSingleDefRecipe *SDR) {
1395 auto *VPI = dyn_cast<VPInstruction>(SDR);
1396 return VPI && VPI->getOpcode() == Instruction::PHI;
1397 }
1398
1399 VPPhi *clone() override {
1400 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1401 PhiR->setUnderlyingValue(getUnderlyingValue());
1402 return PhiR;
1403 }
1404
1405 void execute(VPTransformState &State) override;
1406
1407protected:
1408#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1409 /// Print the recipe.
1410 void printRecipe(raw_ostream &O, const Twine &Indent,
1411 VPSlotTracker &SlotTracker) const override;
1412#endif
1413
1414 const VPRecipeBase *getAsRecipe() const override { return this; }
1415};
1416
1417/// A recipe to wrap on original IR instruction not to be modified during
1418/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1419/// Expect PHIs, VPIRInstructions cannot have any operands.
1421 Instruction &I;
1422
1423protected:
1424 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1425 /// subclasses may need to be created, e.g. VPIRPhi.
1427 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1428
1429public:
1430 ~VPIRInstruction() override = default;
1431
1432 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1433 /// VPIRInstruction.
1435
1436 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1437
1439 auto *R = create(I);
1440 for (auto *Op : operands())
1441 R->addOperand(Op);
1442 return R;
1443 }
1444
1445 void execute(VPTransformState &State) override;
1446
1447 /// Return the cost of this VPIRInstruction.
1449 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1450
1451 Instruction &getInstruction() const { return I; }
1452
1453 bool usesScalars(const VPValue *Op) const override {
1455 "Op must be an operand of the recipe");
1456 return true;
1457 }
1458
1459 bool usesFirstPartOnly(const VPValue *Op) const override {
1461 "Op must be an operand of the recipe");
1462 return true;
1463 }
1464
1465 bool usesFirstLaneOnly(const VPValue *Op) const override {
1467 "Op must be an operand of the recipe");
1468 return true;
1469 }
1470
1471 /// Update the recipe's first operand to the last lane of the last part of the
1472 /// operand using \p Builder. Must only be used for VPIRInstructions with at
1473 /// least one operand wrapping a PHINode.
1475
1476protected:
1477#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1478 /// Print the recipe.
1479 void printRecipe(raw_ostream &O, const Twine &Indent,
1480 VPSlotTracker &SlotTracker) const override;
1481#endif
1482};
1483
1484/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1485/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1486/// allowed, and it is used to add a new incoming value for the single
1487/// predecessor VPBB.
1489 public VPPhiAccessors {
1491
1492 static inline bool classof(const VPRecipeBase *U) {
1493 auto *R = dyn_cast<VPIRInstruction>(U);
1494 return R && isa<PHINode>(R->getInstruction());
1495 }
1496
1498
1499 void execute(VPTransformState &State) override;
1500
1501protected:
1502#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1503 /// Print the recipe.
1504 void printRecipe(raw_ostream &O, const Twine &Indent,
1505 VPSlotTracker &SlotTracker) const override;
1506#endif
1507
1508 const VPRecipeBase *getAsRecipe() const override { return this; }
1509};
1510
1511/// VPWidenRecipe is a recipe for producing a widened instruction using the
1512/// opcode and operands of the recipe. This recipe covers most of the
1513/// traditional vectorization cases where each recipe transforms into a
1514/// vectorized version of itself.
1516 public VPIRMetadata {
1517 unsigned Opcode;
1518
1519public:
1521 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1522 DebugLoc DL = {})
1523 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1524 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1525 setUnderlyingValue(&I);
1526 }
1527
1528 ~VPWidenRecipe() override = default;
1529
1530 VPWidenRecipe *clone() override {
1531 return new VPWidenRecipe(*getUnderlyingInstr(), operands(), *this, *this,
1532 getDebugLoc());
1533 }
1534
1535 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1536
1537 /// Produce a widened instruction using the opcode and operands of the recipe,
1538 /// processing State.VF elements.
1539 void execute(VPTransformState &State) override;
1540
1541 /// Return the cost of this VPWidenRecipe.
1542 InstructionCost computeCost(ElementCount VF,
1543 VPCostContext &Ctx) const override;
1544
1545 unsigned getOpcode() const { return Opcode; }
1546
1547protected:
1548#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1549 /// Print the recipe.
1550 void printRecipe(raw_ostream &O, const Twine &Indent,
1551 VPSlotTracker &SlotTracker) const override;
1552#endif
1553};
1554
1555/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1557 /// Cast instruction opcode.
1558 Instruction::CastOps Opcode;
1559
1560 /// Result type for the cast.
1561 Type *ResultTy;
1562
1563public:
1565 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1566 const VPIRMetadata &Metadata = {},
1568 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1569 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1570 assert(flagsValidForOpcode(Opcode) &&
1571 "Set flags not supported for the provided opcode");
1573 }
1574
1575 ~VPWidenCastRecipe() override = default;
1576
1578 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1580 *this, *this, getDebugLoc());
1581 }
1582
1583 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1584
1585 /// Produce widened copies of the cast.
1586 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1587
1588 /// Return the cost of this VPWidenCastRecipe.
1590 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1591
1592 Instruction::CastOps getOpcode() const { return Opcode; }
1593
1594 /// Returns the result type of the cast.
1595 Type *getResultType() const { return ResultTy; }
1596
1597protected:
1598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1599 /// Print the recipe.
1600 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1601 VPSlotTracker &SlotTracker) const override;
1602#endif
1603};
1604
1605/// A recipe for widening vector intrinsics.
1607 /// ID of the vector intrinsic to widen.
1608 Intrinsic::ID VectorIntrinsicID;
1609
1610 /// Scalar return type of the intrinsic.
1611 Type *ResultTy;
1612
1613 /// True if the intrinsic may read from memory.
1614 bool MayReadFromMemory;
1615
1616 /// True if the intrinsic may read write to memory.
1617 bool MayWriteToMemory;
1618
1619 /// True if the intrinsic may have side-effects.
1620 bool MayHaveSideEffects;
1621
1622public:
1624 ArrayRef<VPValue *> CallArguments, Type *Ty,
1625 const VPIRFlags &Flags = {},
1626 const VPIRMetadata &MD = {},
1628 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1629 DL),
1630 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1631 MayReadFromMemory(CI.mayReadFromMemory()),
1632 MayWriteToMemory(CI.mayWriteToMemory()),
1633 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1634 setUnderlyingValue(&CI);
1635 }
1636
1638 ArrayRef<VPValue *> CallArguments, Type *Ty,
1639 const VPIRFlags &Flags = {},
1640 const VPIRMetadata &Metadata = {},
1642 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, Flags,
1643 DL),
1644 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1645 ResultTy(Ty) {
1646 LLVMContext &Ctx = Ty->getContext();
1647 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1648 MemoryEffects ME = Attrs.getMemoryEffects();
1649 MayReadFromMemory = !ME.onlyWritesMemory();
1650 MayWriteToMemory = !ME.onlyReadsMemory();
1651 MayHaveSideEffects = MayWriteToMemory ||
1652 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1653 !Attrs.hasAttribute(Attribute::WillReturn);
1654 }
1655
1656 ~VPWidenIntrinsicRecipe() override = default;
1657
1659 if (Value *CI = getUnderlyingValue())
1660 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1661 operands(), ResultTy, *this, *this,
1662 getDebugLoc());
1663 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1664 *this, *this, getDebugLoc());
1665 }
1666
1667 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1668
1669 /// Produce a widened version of the vector intrinsic.
1670 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1671
1672 /// Return the cost of this vector intrinsic.
1674 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1675
1676 /// Return the ID of the intrinsic.
1677 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1678
1679 /// Return the scalar return type of the intrinsic.
1680 Type *getResultType() const { return ResultTy; }
1681
1682 /// Return to name of the intrinsic as string.
1684
1685 /// Returns true if the intrinsic may read from memory.
1686 bool mayReadFromMemory() const { return MayReadFromMemory; }
1687
1688 /// Returns true if the intrinsic may write to memory.
1689 bool mayWriteToMemory() const { return MayWriteToMemory; }
1690
1691 /// Returns true if the intrinsic may have side-effects.
1692 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1693
1694 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1695
1696protected:
1697#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1698 /// Print the recipe.
1699 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1700 VPSlotTracker &SlotTracker) const override;
1701#endif
1702};
1703
1704/// A recipe for widening Call instructions using library calls.
1706 public VPIRMetadata {
1707 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1708 /// between a given VF and the chosen vectorized variant, so there will be a
1709 /// different VPlan for each VF with a valid variant.
1710 Function *Variant;
1711
1712public:
1714 ArrayRef<VPValue *> CallArguments,
1715 const VPIRFlags &Flags = {},
1716 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1717 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, Flags, DL),
1718 VPIRMetadata(Metadata), Variant(Variant) {
1719 setUnderlyingValue(UV);
1720 assert(
1721 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1722 "last operand must be the called function");
1723 }
1724
1725 ~VPWidenCallRecipe() override = default;
1726
1728 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1729 *this, *this, getDebugLoc());
1730 }
1731
1732 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1733
1734 /// Produce a widened version of the call instruction.
1735 void execute(VPTransformState &State) override;
1736
1737 /// Return the cost of this VPWidenCallRecipe.
1738 InstructionCost computeCost(ElementCount VF,
1739 VPCostContext &Ctx) const override;
1740
1744
1747
1748protected:
1749#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1750 /// Print the recipe.
1751 void printRecipe(raw_ostream &O, const Twine &Indent,
1752 VPSlotTracker &SlotTracker) const override;
1753#endif
1754};
1755
1756/// A recipe representing a sequence of load -> update -> store as part of
1757/// a histogram operation. This means there may be aliasing between vector
1758/// lanes, which is handled by the llvm.experimental.vector.histogram family
1759/// of intrinsics. The only update operations currently supported are
1760/// 'add' and 'sub' where the other term is loop-invariant.
1762 /// Opcode of the update operation, currently either add or sub.
1763 unsigned Opcode;
1764
1765public:
1766 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1768 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1769
1770 ~VPHistogramRecipe() override = default;
1771
1773 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1774 }
1775
1776 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1777
1778 /// Produce a vectorized histogram operation.
1779 void execute(VPTransformState &State) override;
1780
1781 /// Return the cost of this VPHistogramRecipe.
1783 VPCostContext &Ctx) const override;
1784
1785 unsigned getOpcode() const { return Opcode; }
1786
1787 /// Return the mask operand if one was provided, or a null pointer if all
1788 /// lanes should be executed unconditionally.
1789 VPValue *getMask() const {
1790 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1791 }
1792
1793protected:
1794#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1795 /// Print the recipe
1796 void printRecipe(raw_ostream &O, const Twine &Indent,
1797 VPSlotTracker &SlotTracker) const override;
1798#endif
1799};
1800
1801/// A recipe for widening select instructions. Supports both wide vector and
1802/// single-scalar conditions, matching the behavior of LLVM IR's select
1803/// instruction.
1805 public VPIRMetadata {
1807 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1808 DebugLoc DL = {})
1809 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, Flags, DL),
1810 VPIRMetadata(MD) {
1811 setUnderlyingValue(SI);
1812 }
1813
1814 ~VPWidenSelectRecipe() override = default;
1815
1818 operands(), *this, *this, getDebugLoc());
1819 }
1820
1821 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1822
1823 /// Produce a widened version of the select instruction.
1824 void execute(VPTransformState &State) override;
1825
1826 /// Return the cost of this VPWidenSelectRecipe.
1827 InstructionCost computeCost(ElementCount VF,
1828 VPCostContext &Ctx) const override;
1829
1830 unsigned getOpcode() const { return Instruction::Select; }
1831
1832 VPValue *getCond() const {
1833 return getOperand(0);
1834 }
1835
1836 /// Returns true if the recipe only uses the first lane of operand \p Op.
1837 bool usesFirstLaneOnly(const VPValue *Op) const override {
1839 "Op must be an operand of the recipe");
1840 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1841 }
1842
1843protected:
1844#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1845 /// Print the recipe.
1846 void printRecipe(raw_ostream &O, const Twine &Indent,
1847 VPSlotTracker &SlotTracker) const override;
1848#endif
1849};
1850
1851/// A recipe for handling GEP instructions.
1853 Type *SourceElementTy;
1854
1855 bool isPointerLoopInvariant() const {
1856 return getOperand(0)->isDefinedOutsideLoopRegions();
1857 }
1858
1859 bool isIndexLoopInvariant(unsigned I) const {
1860 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1861 }
1862
1863public:
1865 const VPIRFlags &Flags = {},
1867 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, Flags, DL),
1868 SourceElementTy(GEP->getSourceElementType()) {
1869 setUnderlyingValue(GEP);
1871 (void)Metadata;
1873 assert(Metadata.empty() && "unexpected metadata on GEP");
1874 }
1875
1876 ~VPWidenGEPRecipe() override = default;
1877
1880 operands(), *this, getDebugLoc());
1881 }
1882
1883 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1884
1885 /// This recipe generates a GEP instruction.
1886 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1887
1888 /// Generate the gep nodes.
1889 void execute(VPTransformState &State) override;
1890
1891 Type *getSourceElementType() const { return SourceElementTy; }
1892
1893 /// Return the cost of this VPWidenGEPRecipe.
1895 VPCostContext &Ctx) const override {
1896 // TODO: Compute accurate cost after retiring the legacy cost model.
1897 return 0;
1898 }
1899
1900 /// Returns true if the recipe only uses the first lane of operand \p Op.
1901 bool usesFirstLaneOnly(const VPValue *Op) const override;
1902
1903protected:
1904#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1905 /// Print the recipe.
1906 void printRecipe(raw_ostream &O, const Twine &Indent,
1907 VPSlotTracker &SlotTracker) const override;
1908#endif
1909};
1910
1911/// A recipe to compute a pointer to the last element of each part of a widened
1912/// memory access for widened memory accesses of IndexedTy. Used for
1913/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1915 public VPUnrollPartAccessor<2> {
1916 Type *IndexedTy;
1917
1918 /// The constant stride of the pointer computed by this recipe, expressed in
1919 /// units of IndexedTy.
1920 int64_t Stride;
1921
1922public:
1924 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1925 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1926 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1927 IndexedTy(IndexedTy), Stride(Stride) {
1928 assert(Stride < 0 && "Stride must be negative");
1929 }
1930
1931 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1932
1934 const VPValue *getVFValue() const { return getOperand(1); }
1935
1936 void execute(VPTransformState &State) override;
1937
1938 bool usesFirstLaneOnly(const VPValue *Op) const override {
1940 "Op must be an operand of the recipe");
1941 return true;
1942 }
1943
1944 /// Return the cost of this VPVectorPointerRecipe.
1946 VPCostContext &Ctx) const override {
1947 // TODO: Compute accurate cost after retiring the legacy cost model.
1948 return 0;
1949 }
1950
1951 /// Returns true if the recipe only uses the first part of operand \p Op.
1952 bool usesFirstPartOnly(const VPValue *Op) const override {
1954 "Op must be an operand of the recipe");
1955 assert(getNumOperands() <= 2 && "must have at most two operands");
1956 return true;
1957 }
1958
1960 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1961 Stride, getGEPNoWrapFlags(),
1962 getDebugLoc());
1963 }
1964
1965protected:
1966#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1967 /// Print the recipe.
1968 void printRecipe(raw_ostream &O, const Twine &Indent,
1969 VPSlotTracker &SlotTracker) const override;
1970#endif
1971};
1972
1973/// A recipe to compute the pointers for widened memory accesses of \p
1974/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
1975/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
1977 Type *SourceElementTy;
1978
1979public:
1980 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
1982 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, Ptr, GEPFlags, DL),
1983 SourceElementTy(SourceElementTy) {}
1984
1985 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1986
1988 return getNumOperands() == 2 ? getOperand(1) : nullptr;
1989 }
1990
1991 void execute(VPTransformState &State) override;
1992
1993 Type *getSourceElementType() const { return SourceElementTy; }
1994
1995 bool usesFirstLaneOnly(const VPValue *Op) const override {
1997 "Op must be an operand of the recipe");
1998 return true;
1999 }
2000
2001 /// Returns true if the recipe only uses the first part of operand \p Op.
2002 bool usesFirstPartOnly(const VPValue *Op) const override {
2004 "Op must be an operand of the recipe");
2005 assert(getNumOperands() <= 2 && "must have at most two operands");
2006 return true;
2007 }
2008
2010 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2012 if (auto *Off = getOffset())
2013 Clone->addOperand(Off);
2014 return Clone;
2015 }
2016
2017 /// Return the cost of this VPHeaderPHIRecipe.
2019 VPCostContext &Ctx) const override {
2020 // TODO: Compute accurate cost after retiring the legacy cost model.
2021 return 0;
2022 }
2023
2024protected:
2025#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2026 /// Print the recipe.
2027 void printRecipe(raw_ostream &O, const Twine &Indent,
2028 VPSlotTracker &SlotTracker) const override;
2029#endif
2030};
2031
2032/// A pure virtual base class for all recipes modeling header phis, including
2033/// phis for first order recurrences, pointer inductions and reductions. The
2034/// start value is the first operand of the recipe and the incoming value from
2035/// the backedge is the second operand.
2036///
2037/// Inductions are modeled using the following sub-classes:
2038/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2039/// starting at a specified value (zero for the main vector loop, the resume
2040/// value for the epilogue vector loop) and stepping by 1. The induction
2041/// controls exiting of the vector loop by comparing against the vector trip
2042/// count. Produces a single scalar PHI for the induction value per
2043/// iteration.
2044/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2045/// floating point inductions with arbitrary start and step values. Produces
2046/// a vector PHI per-part.
2047/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2048/// value of an IV with different start and step values. Produces a single
2049/// scalar value per iteration
2050/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2051/// canonical or derived induction.
2052/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2053/// pointer induction. Produces either a vector PHI per-part or scalar values
2054/// per-lane based on the canonical induction.
2056 public VPPhiAccessors {
2057protected:
2058 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2059 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2060 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
2061 UnderlyingInstr, DL) {}
2062
2063 const VPRecipeBase *getAsRecipe() const override { return this; }
2064
2065public:
2066 ~VPHeaderPHIRecipe() override = default;
2067
2068 /// Method to support type inquiry through isa, cast, and dyn_cast.
2069 static inline bool classof(const VPRecipeBase *R) {
2070 return R->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2071 R->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2072 }
2073 static inline bool classof(const VPValue *V) {
2074 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2075 }
2076 static inline bool classof(const VPSingleDefRecipe *R) {
2077 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2078 }
2079
2080 /// Generate the phi nodes.
2081 void execute(VPTransformState &State) override = 0;
2082
2083 /// Return the cost of this header phi recipe.
2085 VPCostContext &Ctx) const override;
2086
2087 /// Returns the start value of the phi, if one is set.
2089 return getNumOperands() == 0 ? nullptr : getOperand(0);
2090 }
2092 return getNumOperands() == 0 ? nullptr : getOperand(0);
2093 }
2094
2095 /// Update the start value of the recipe.
2097
2098 /// Returns the incoming value from the loop backedge.
2100 return getOperand(1);
2101 }
2102
2103 /// Update the incoming value from the loop backedge.
2105
2106 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2107 /// to be a recipe.
2109 return *getBackedgeValue()->getDefiningRecipe();
2110 }
2111
2112protected:
2113#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2114 /// Print the recipe.
2115 void printRecipe(raw_ostream &O, const Twine &Indent,
2116 VPSlotTracker &SlotTracker) const override = 0;
2117#endif
2118};
2119
2120/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2121/// VPWidenPointerInductionRecipe), providing shared functionality, including
2122/// retrieving the step value, induction descriptor and original phi node.
2124 const InductionDescriptor &IndDesc;
2125
2126public:
2127 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2128 VPValue *Step, const InductionDescriptor &IndDesc,
2129 DebugLoc DL)
2130 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2131 addOperand(Step);
2132 }
2133
2134 static inline bool classof(const VPRecipeBase *R) {
2135 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2136 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2137 }
2138
2139 static inline bool classof(const VPValue *V) {
2140 auto *R = V->getDefiningRecipe();
2141 return R && classof(R);
2142 }
2143
2144 static inline bool classof(const VPSingleDefRecipe *R) {
2145 return classof(static_cast<const VPRecipeBase *>(R));
2146 }
2147
2148 void execute(VPTransformState &State) override = 0;
2149
2150 /// Returns the step value of the induction.
2152 const VPValue *getStepValue() const { return getOperand(1); }
2153
2154 /// Update the step value of the recipe.
2155 void setStepValue(VPValue *V) { setOperand(1, V); }
2156
2158 const VPValue *getVFValue() const { return getOperand(2); }
2159
2160 /// Returns the number of incoming values, also number of incoming blocks.
2161 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2162 /// incoming value, its start value.
2163 unsigned getNumIncoming() const override { return 1; }
2164
2166
2167 /// Returns the induction descriptor for the recipe.
2168 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2169
2171 // TODO: All operands of base recipe must exist and be at same index in
2172 // derived recipe.
2174 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2175 }
2176
2178 // TODO: All operands of base recipe must exist and be at same index in
2179 // derived recipe.
2181 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2182 }
2183
2184 /// Returns true if the recipe only uses the first lane of operand \p Op.
2185 bool usesFirstLaneOnly(const VPValue *Op) const override {
2187 "Op must be an operand of the recipe");
2188 // The recipe creates its own wide start value, so it only requests the
2189 // first lane of the operand.
2190 // TODO: Remove once creating the start value is modeled separately.
2191 return Op == getStartValue() || Op == getStepValue();
2192 }
2193};
2194
2195/// A recipe for handling phi nodes of integer and floating-point inductions,
2196/// producing their vector values. This is an abstract recipe and must be
2197/// converted to concrete recipes before executing.
2199 public VPIRFlags {
2200 TruncInst *Trunc;
2201
2202 // If this recipe is unrolled it will have 2 additional operands.
2203 bool isUnrolled() const { return getNumOperands() == 5; }
2204
2205public:
2207 VPValue *VF, const InductionDescriptor &IndDesc,
2208 const VPIRFlags &Flags, DebugLoc DL)
2209 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2210 Step, IndDesc, DL),
2211 VPIRFlags(Flags), Trunc(nullptr) {
2212 addOperand(VF);
2213 }
2214
2216 VPValue *VF, const InductionDescriptor &IndDesc,
2217 TruncInst *Trunc, const VPIRFlags &Flags,
2218 DebugLoc DL)
2219 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2220 Step, IndDesc, DL),
2221 VPIRFlags(Flags), Trunc(Trunc) {
2222 addOperand(VF);
2224 (void)Metadata;
2225 if (Trunc)
2227 assert(Metadata.empty() && "unexpected metadata on Trunc");
2228 }
2229
2231
2237
2238 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2239
2240 void execute(VPTransformState &State) override {
2241 llvm_unreachable("cannot execute this recipe, should be expanded via "
2242 "expandVPWidenIntOrFpInductionRecipe");
2243 }
2244
2246 // If the recipe has been unrolled return the VPValue for the induction
2247 // increment.
2248 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2249 }
2250
2251 /// Returns the number of incoming values, also number of incoming blocks.
2252 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2253 /// incoming value, its start value.
2254 unsigned getNumIncoming() const override { return 1; }
2255
2256 /// Returns the first defined value as TruncInst, if it is one or nullptr
2257 /// otherwise.
2258 TruncInst *getTruncInst() { return Trunc; }
2259 const TruncInst *getTruncInst() const { return Trunc; }
2260
2261 /// Returns true if the induction is canonical, i.e. starting at 0 and
2262 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2263 /// same type as the canonical induction.
2264 bool isCanonical() const;
2265
2266 /// Returns the scalar type of the induction.
2268 return Trunc ? Trunc->getType()
2270 }
2271
2272 /// Returns the VPValue representing the value of this induction at
2273 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2274 /// take place.
2276 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2277 }
2278
2279protected:
2280#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2281 /// Print the recipe.
2282 void printRecipe(raw_ostream &O, const Twine &Indent,
2283 VPSlotTracker &SlotTracker) const override;
2284#endif
2285};
2286
2288public:
2289 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2290 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2291 /// VF*UF.
2293 VPValue *NumUnrolledElems,
2294 const InductionDescriptor &IndDesc, DebugLoc DL)
2295 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2296 Step, IndDesc, DL) {
2297 addOperand(NumUnrolledElems);
2298 }
2299
2301
2307
2308 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2309
2310 /// Generate vector values for the pointer induction.
2311 void execute(VPTransformState &State) override {
2312 llvm_unreachable("cannot execute this recipe, should be expanded via "
2313 "expandVPWidenPointerInduction");
2314 };
2315
2316 /// Returns true if only scalar values will be generated.
2317 bool onlyScalarsGenerated(bool IsScalable);
2318
2319protected:
2320#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2321 /// Print the recipe.
2322 void printRecipe(raw_ostream &O, const Twine &Indent,
2323 VPSlotTracker &SlotTracker) const override;
2324#endif
2325};
2326
2327/// A recipe for widened phis. Incoming values are operands of the recipe and
2328/// their operand index corresponds to the incoming predecessor block. If the
2329/// recipe is placed in an entry block to a (non-replicate) region, it must have
2330/// exactly 2 incoming values, the first from the predecessor of the region and
2331/// the second from the exiting block of the region.
2333 public VPPhiAccessors {
2334 /// Name to use for the generated IR instruction for the widened phi.
2335 std::string Name;
2336
2337public:
2338 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2339 /// debug location \p DL.
2340 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2341 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2342 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2343 if (Start)
2344 addOperand(Start);
2345 }
2346
2349 getOperand(0), getDebugLoc(), Name);
2351 C->addOperand(Op);
2352 return C;
2353 }
2354
2355 ~VPWidenPHIRecipe() override = default;
2356
2357 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2358
2359 /// Generate the phi/select nodes.
2360 void execute(VPTransformState &State) override;
2361
2362protected:
2363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2364 /// Print the recipe.
2365 void printRecipe(raw_ostream &O, const Twine &Indent,
2366 VPSlotTracker &SlotTracker) const override;
2367#endif
2368
2369 const VPRecipeBase *getAsRecipe() const override { return this; }
2370};
2371
2372/// A recipe for handling first-order recurrence phis. The start value is the
2373/// first operand of the recipe and the incoming value from the backedge is the
2374/// second operand.
2377 VPValue &BackedgeValue)
2378 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {
2379 addOperand(&BackedgeValue);
2380 }
2381
2382 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2383
2388
2389 void execute(VPTransformState &State) override;
2390
2391 /// Return the cost of this first-order recurrence phi recipe.
2393 VPCostContext &Ctx) const override;
2394
2395 /// Returns true if the recipe only uses the first lane of operand \p Op.
2396 bool usesFirstLaneOnly(const VPValue *Op) const override {
2398 "Op must be an operand of the recipe");
2399 return Op == getStartValue();
2400 }
2401
2402protected:
2403#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2404 /// Print the recipe.
2405 void printRecipe(raw_ostream &O, const Twine &Indent,
2406 VPSlotTracker &SlotTracker) const override;
2407#endif
2408};
2409
2410/// Possible variants of a reduction.
2411
2412/// This reduction is ordered and in-loop.
2413struct RdxOrdered {};
2414/// This reduction is in-loop.
2415struct RdxInLoop {};
2416/// This reduction is unordered with the partial result scaled down by some
2417/// factor.
2420};
2421using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2422
2423inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2424 unsigned ScaleFactor) {
2425 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2426 if (Ordered)
2427 return RdxOrdered{};
2428 if (InLoop)
2429 return RdxInLoop{};
2430 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2431}
2432
2433/// A recipe for handling reduction phis. The start value is the first operand
2434/// of the recipe and the incoming value from the backedge is the second
2435/// operand.
2437 public VPUnrollPartAccessor<2> {
2438 /// The recurrence kind of the reduction.
2439 const RecurKind Kind;
2440
2441 ReductionStyle Style;
2442
2443 /// The phi is part of a multi-use reduction (e.g., used in FindLastIV
2444 /// patterns for argmin/argmax).
2445 /// TODO: Also support cases where the phi itself has a single use, but its
2446 /// compare has multiple uses.
2447 bool HasUsesOutsideReductionChain;
2448
2449public:
2450 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2452 VPValue &BackedgeValue, ReductionStyle Style,
2453 bool HasUsesOutsideReductionChain = false)
2454 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2455 Style(Style),
2456 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2457 addOperand(&BackedgeValue);
2458 }
2459
2460 ~VPReductionPHIRecipe() override = default;
2461
2463 return new VPReductionPHIRecipe(
2465 *getOperand(0), *getBackedgeValue(), Style,
2466 HasUsesOutsideReductionChain);
2467 }
2468
2469 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2470
2471 /// Generate the phi/select nodes.
2472 void execute(VPTransformState &State) override;
2473
2474 /// Get the factor that the VF of this recipe's output should be scaled by, or
2475 /// 1 if it isn't scaled.
2476 unsigned getVFScaleFactor() const {
2477 auto *Partial = std::get_if<RdxUnordered>(&Style);
2478 return Partial ? Partial->VFScaleFactor : 1;
2479 }
2480
2481 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2482 /// > 1.
2483 void setVFScaleFactor(unsigned ScaleFactor) {
2484 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2485 Style = RdxUnordered{ScaleFactor};
2486 }
2487
2488 /// Returns the number of incoming values, also number of incoming blocks.
2489 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2490 /// incoming value, its start value.
2491 unsigned getNumIncoming() const override { return 2; }
2492
2493 /// Returns the recurrence kind of the reduction.
2494 RecurKind getRecurrenceKind() const { return Kind; }
2495
2496 /// Returns true, if the phi is part of an ordered reduction.
2497 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2498
2499 /// Returns true if the phi is part of an in-loop reduction.
2500 bool isInLoop() const {
2501 return std::holds_alternative<RdxInLoop>(Style) ||
2502 std::holds_alternative<RdxOrdered>(Style);
2503 }
2504
2505 /// Returns true if the reduction outputs a vector with a scaled down VF.
2506 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2507
2508 /// Returns true, if the phi is part of a multi-use reduction.
2510 return HasUsesOutsideReductionChain;
2511 }
2512
2513 /// Returns true if the recipe only uses the first lane of operand \p Op.
2514 bool usesFirstLaneOnly(const VPValue *Op) const override {
2516 "Op must be an operand of the recipe");
2517 return isOrdered() || isInLoop();
2518 }
2519
2520protected:
2521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2522 /// Print the recipe.
2523 void printRecipe(raw_ostream &O, const Twine &Indent,
2524 VPSlotTracker &SlotTracker) const override;
2525#endif
2526};
2527
2528/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2529/// instructions.
2531public:
2532 /// The blend operation is a User of the incoming values and of their
2533 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2534 /// be omitted (implied by passing an odd number of operands) in which case
2535 /// all other incoming values are merged into it.
2537 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2538 assert(Operands.size() >= 2 && "Expected at least two operands!");
2539 }
2540
2545
2546 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2547
2548 /// A normalized blend is one that has an odd number of operands, whereby the
2549 /// first operand does not have an associated mask.
2550 bool isNormalized() const { return getNumOperands() % 2; }
2551
2552 /// Return the number of incoming values, taking into account when normalized
2553 /// the first incoming value will have no mask.
2554 unsigned getNumIncomingValues() const {
2555 return (getNumOperands() + isNormalized()) / 2;
2556 }
2557
2558 /// Return incoming value number \p Idx.
2559 VPValue *getIncomingValue(unsigned Idx) const {
2560 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2561 }
2562
2563 /// Return mask number \p Idx.
2564 VPValue *getMask(unsigned Idx) const {
2565 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2566 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2567 }
2568
2569 /// Set mask number \p Idx to \p V.
2570 void setMask(unsigned Idx, VPValue *V) {
2571 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2572 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2573 }
2574
2575 void execute(VPTransformState &State) override {
2576 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2577 }
2578
2579 /// Return the cost of this VPWidenMemoryRecipe.
2580 InstructionCost computeCost(ElementCount VF,
2581 VPCostContext &Ctx) const override;
2582
2583 /// Returns true if the recipe only uses the first lane of operand \p Op.
2584 bool usesFirstLaneOnly(const VPValue *Op) const override {
2586 "Op must be an operand of the recipe");
2587 // Recursing through Blend recipes only, must terminate at header phi's the
2588 // latest.
2589 return all_of(users(),
2590 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2591 }
2592
2593protected:
2594#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2595 /// Print the recipe.
2596 void printRecipe(raw_ostream &O, const Twine &Indent,
2597 VPSlotTracker &SlotTracker) const override;
2598#endif
2599};
2600
2601/// A common base class for interleaved memory operations.
2602/// An Interleaved memory operation is a memory access method that combines
2603/// multiple strided loads/stores into a single wide load/store with shuffles.
2604/// The first operand is the start address. The optional operands are, in order,
2605/// the stored values and the mask.
2607 public VPIRMetadata {
2609
2610 /// Indicates if the interleave group is in a conditional block and requires a
2611 /// mask.
2612 bool HasMask = false;
2613
2614 /// Indicates if gaps between members of the group need to be masked out or if
2615 /// unusued gaps can be loaded speculatively.
2616 bool NeedsMaskForGaps = false;
2617
2618protected:
2619 VPInterleaveBase(const unsigned char SC,
2621 ArrayRef<VPValue *> Operands,
2622 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2623 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2624 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2625 NeedsMaskForGaps(NeedsMaskForGaps) {
2626 // TODO: extend the masked interleaved-group support to reversed access.
2627 assert((!Mask || !IG->isReverse()) &&
2628 "Reversed masked interleave-group not supported.");
2629 for (unsigned I = 0; I < IG->getFactor(); ++I)
2630 if (Instruction *Inst = IG->getMember(I)) {
2631 if (Inst->getType()->isVoidTy())
2632 continue;
2633 new VPValue(Inst, this);
2634 }
2635
2636 for (auto *SV : StoredValues)
2637 addOperand(SV);
2638 if (Mask) {
2639 HasMask = true;
2640 addOperand(Mask);
2641 }
2642 }
2643
2644public:
2645 VPInterleaveBase *clone() override = 0;
2646
2647 static inline bool classof(const VPRecipeBase *R) {
2648 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2649 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2650 }
2651
2652 static inline bool classof(const VPUser *U) {
2653 auto *R = dyn_cast<VPRecipeBase>(U);
2654 return R && classof(R);
2655 }
2656
2657 /// Return the address accessed by this recipe.
2658 VPValue *getAddr() const {
2659 return getOperand(0); // Address is the 1st, mandatory operand.
2660 }
2661
2662 /// Return the mask used by this recipe. Note that a full mask is represented
2663 /// by a nullptr.
2664 VPValue *getMask() const {
2665 // Mask is optional and the last operand.
2666 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2667 }
2668
2669 /// Return true if the access needs a mask because of the gaps.
2670 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2671
2673
2674 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2675
2676 void execute(VPTransformState &State) override {
2677 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2678 }
2679
2680 /// Return the cost of this recipe.
2681 InstructionCost computeCost(ElementCount VF,
2682 VPCostContext &Ctx) const override;
2683
2684 /// Returns true if the recipe only uses the first lane of operand \p Op.
2685 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2686
2687 /// Returns the number of stored operands of this interleave group. Returns 0
2688 /// for load interleave groups.
2689 virtual unsigned getNumStoreOperands() const = 0;
2690
2691 /// Return the VPValues stored by this interleave group. If it is a load
2692 /// interleave group, return an empty ArrayRef.
2694 return ArrayRef<VPValue *>(op_end() -
2695 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2697 }
2698};
2699
2700/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2701/// or stores into one wide load/store and shuffles. The first operand of a
2702/// VPInterleave recipe is the address, followed by the stored values, followed
2703/// by an optional mask.
2705public:
2707 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2708 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2709 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2710 NeedsMaskForGaps, MD, DL) {}
2711
2712 ~VPInterleaveRecipe() override = default;
2713
2717 needsMaskForGaps(), *this, getDebugLoc());
2718 }
2719
2720 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2721
2722 /// Generate the wide load or store, and shuffles.
2723 void execute(VPTransformState &State) override;
2724
2725 bool usesFirstLaneOnly(const VPValue *Op) const override {
2727 "Op must be an operand of the recipe");
2728 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2729 }
2730
2731 unsigned getNumStoreOperands() const override {
2732 return getNumOperands() - (getMask() ? 2 : 1);
2733 }
2734
2735protected:
2736#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2737 /// Print the recipe.
2738 void printRecipe(raw_ostream &O, const Twine &Indent,
2739 VPSlotTracker &SlotTracker) const override;
2740#endif
2741};
2742
2743/// A recipe for interleaved memory operations with vector-predication
2744/// intrinsics. The first operand is the address, the second operand is the
2745/// explicit vector length. Stored values and mask are optional operands.
2747public:
2749 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2750 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2751 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2752 R.getDebugLoc()) {
2753 assert(!getInterleaveGroup()->isReverse() &&
2754 "Reversed interleave-group with tail folding is not supported.");
2755 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2756 "supported for scalable vector.");
2757 }
2758
2759 ~VPInterleaveEVLRecipe() override = default;
2760
2762 llvm_unreachable("cloning not implemented yet");
2763 }
2764
2765 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2766
2767 /// The VPValue of the explicit vector length.
2768 VPValue *getEVL() const { return getOperand(1); }
2769
2770 /// Generate the wide load or store, and shuffles.
2771 void execute(VPTransformState &State) override;
2772
2773 /// The recipe only uses the first lane of the address, and EVL operand.
2774 bool usesFirstLaneOnly(const VPValue *Op) const override {
2776 "Op must be an operand of the recipe");
2777 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2778 Op == getEVL();
2779 }
2780
2781 unsigned getNumStoreOperands() const override {
2782 return getNumOperands() - (getMask() ? 3 : 2);
2783 }
2784
2785protected:
2786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2787 /// Print the recipe.
2788 void printRecipe(raw_ostream &O, const Twine &Indent,
2789 VPSlotTracker &SlotTracker) const override;
2790#endif
2791};
2792
2793/// A recipe to represent inloop, ordered or partial reduction operations. It
2794/// performs a reduction on a vector operand into a scalar (vector in the case
2795/// of a partial reduction) value, and adds the result to a chain. The Operands
2796/// are {ChainOp, VecOp, [Condition]}.
2798
2799 /// The recurrence kind for the reduction in question.
2800 RecurKind RdxKind;
2801 /// Whether the reduction is conditional.
2802 bool IsConditional = false;
2803 ReductionStyle Style;
2804
2805protected:
2806 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2808 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2809 ReductionStyle Style, DebugLoc DL)
2810 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2811 Style(Style) {
2812 if (CondOp) {
2813 IsConditional = true;
2814 addOperand(CondOp);
2815 }
2817 }
2818
2819public:
2821 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2823 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2824 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2825 DL) {}
2826
2828 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2830 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2831 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp, Style,
2832 DL) {}
2833
2834 ~VPReductionRecipe() override = default;
2835
2837 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2839 getCondOp(), Style, getDebugLoc());
2840 }
2841
2842 static inline bool classof(const VPRecipeBase *R) {
2843 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2844 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2845 }
2846
2847 static inline bool classof(const VPUser *U) {
2848 auto *R = dyn_cast<VPRecipeBase>(U);
2849 return R && classof(R);
2850 }
2851
2852 static inline bool classof(const VPValue *VPV) {
2853 const VPRecipeBase *R = VPV->getDefiningRecipe();
2854 return R && classof(R);
2855 }
2856
2857 static inline bool classof(const VPSingleDefRecipe *R) {
2858 return classof(static_cast<const VPRecipeBase *>(R));
2859 }
2860
2861 /// Generate the reduction in the loop.
2862 void execute(VPTransformState &State) override;
2863
2864 /// Return the cost of VPReductionRecipe.
2865 InstructionCost computeCost(ElementCount VF,
2866 VPCostContext &Ctx) const override;
2867
2868 /// Return the recurrence kind for the in-loop reduction.
2869 RecurKind getRecurrenceKind() const { return RdxKind; }
2870 /// Return true if the in-loop reduction is ordered.
2871 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
2872 /// Return true if the in-loop reduction is conditional.
2873 bool isConditional() const { return IsConditional; };
2874 /// Returns true if the reduction outputs a vector with a scaled down VF.
2875 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2876 /// Returns true if the reduction is in-loop.
2877 bool isInLoop() const {
2878 return std::holds_alternative<RdxInLoop>(Style) ||
2879 std::holds_alternative<RdxOrdered>(Style);
2880 }
2881 /// The VPValue of the scalar Chain being accumulated.
2882 VPValue *getChainOp() const { return getOperand(0); }
2883 /// The VPValue of the vector value to be reduced.
2884 VPValue *getVecOp() const { return getOperand(1); }
2885 /// The VPValue of the condition for the block.
2887 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2888 }
2889 /// Get the factor that the VF of this recipe's output should be scaled by, or
2890 /// 1 if it isn't scaled.
2891 unsigned getVFScaleFactor() const {
2892 auto *Partial = std::get_if<RdxUnordered>(&Style);
2893 return Partial ? Partial->VFScaleFactor : 1;
2894 }
2895
2896protected:
2897#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2898 /// Print the recipe.
2899 void printRecipe(raw_ostream &O, const Twine &Indent,
2900 VPSlotTracker &SlotTracker) const override;
2901#endif
2902};
2903
2904/// A recipe to represent inloop reduction operations with vector-predication
2905/// intrinsics, performing a reduction on a vector operand with the explicit
2906/// vector length (EVL) into a scalar value, and adding the result to a chain.
2907/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2909public:
2913 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2914 R.getFastMathFlags(),
2916 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2917 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1), DL) {}
2918
2919 ~VPReductionEVLRecipe() override = default;
2920
2922 llvm_unreachable("cloning not implemented yet");
2923 }
2924
2925 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2926
2927 /// Generate the reduction in the loop
2928 void execute(VPTransformState &State) override;
2929
2930 /// The VPValue of the explicit vector length.
2931 VPValue *getEVL() const { return getOperand(2); }
2932
2933 /// Returns true if the recipe only uses the first lane of operand \p Op.
2934 bool usesFirstLaneOnly(const VPValue *Op) const override {
2936 "Op must be an operand of the recipe");
2937 return Op == getEVL();
2938 }
2939
2940protected:
2941#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2942 /// Print the recipe.
2943 void printRecipe(raw_ostream &O, const Twine &Indent,
2944 VPSlotTracker &SlotTracker) const override;
2945#endif
2946};
2947
2948/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2949/// copies of the original scalar type, one per lane, instead of producing a
2950/// single copy of widened type for all lanes. If the instruction is known to be
2951/// a single scalar, only one copy, per lane zero, will be generated.
2953 public VPIRMetadata {
2954 /// Indicator if only a single replica per lane is needed.
2955 bool IsSingleScalar;
2956
2957 /// Indicator if the replicas are also predicated.
2958 bool IsPredicated;
2959
2960public:
2962 bool IsSingleScalar, VPValue *Mask = nullptr,
2963 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
2964 DebugLoc DL = DebugLoc::getUnknown())
2965 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, Flags, DL),
2966 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2967 IsPredicated(Mask) {
2968 setUnderlyingValue(I);
2969 if (Mask)
2970 addOperand(Mask);
2971 }
2972
2973 ~VPReplicateRecipe() override = default;
2974
2976 auto *Copy = new VPReplicateRecipe(
2977 getUnderlyingInstr(), operands(), IsSingleScalar,
2978 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
2979 Copy->transferFlags(*this);
2980 return Copy;
2981 }
2982
2983 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2984
2985 /// Generate replicas of the desired Ingredient. Replicas will be generated
2986 /// for all parts and lanes unless a specific part and lane are specified in
2987 /// the \p State.
2988 void execute(VPTransformState &State) override;
2989
2990 /// Return the cost of this VPReplicateRecipe.
2991 InstructionCost computeCost(ElementCount VF,
2992 VPCostContext &Ctx) const override;
2993
2994 bool isSingleScalar() const { return IsSingleScalar; }
2995
2996 bool isPredicated() const { return IsPredicated; }
2997
2998 /// Returns true if the recipe only uses the first lane of operand \p Op.
2999 bool usesFirstLaneOnly(const VPValue *Op) const override {
3001 "Op must be an operand of the recipe");
3002 return isSingleScalar();
3003 }
3004
3005 /// Returns true if the recipe uses scalars of operand \p Op.
3006 bool usesScalars(const VPValue *Op) const override {
3008 "Op must be an operand of the recipe");
3009 return true;
3010 }
3011
3012 /// Returns true if the recipe is used by a widened recipe via an intervening
3013 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3014 /// in a vector.
3015 bool shouldPack() const;
3016
3017 /// Return the mask of a predicated VPReplicateRecipe.
3019 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3020 return getOperand(getNumOperands() - 1);
3021 }
3022
3023 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3024
3025protected:
3026#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3027 /// Print the recipe.
3028 void printRecipe(raw_ostream &O, const Twine &Indent,
3029 VPSlotTracker &SlotTracker) const override;
3030#endif
3031};
3032
3033/// A recipe for generating conditional branches on the bits of a mask.
3035public:
3037 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3038
3041 }
3042
3043 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
3044
3045 /// Generate the extraction of the appropriate bit from the block mask and the
3046 /// conditional branch.
3047 void execute(VPTransformState &State) override;
3048
3049 /// Return the cost of this VPBranchOnMaskRecipe.
3050 InstructionCost computeCost(ElementCount VF,
3051 VPCostContext &Ctx) const override;
3052
3053#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3054 /// Print the recipe.
3055 void printRecipe(raw_ostream &O, const Twine &Indent,
3056 VPSlotTracker &SlotTracker) const override {
3057 O << Indent << "BRANCH-ON-MASK ";
3059 }
3060#endif
3061
3062 /// Returns true if the recipe uses scalars of operand \p Op.
3063 bool usesScalars(const VPValue *Op) const override {
3065 "Op must be an operand of the recipe");
3066 return true;
3067 }
3068};
3069
3070/// A recipe to combine multiple recipes into a single 'expression' recipe,
3071/// which should be considered a single entity for cost-modeling and transforms.
3072/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3073/// expression recipes, before execute. The individual expression recipes are
3074/// completely disconnected from the def-use graph of other recipes not part of
3075/// the expression. Def-use edges between pairs of expression recipes remain
3076/// intact, whereas every edge between an expression recipe and a recipe outside
3077/// the expression is elevated to connect the non-expression recipe with the
3078/// VPExpressionRecipe itself.
3079class VPExpressionRecipe : public VPSingleDefRecipe {
3080 /// Recipes included in this VPExpressionRecipe. This could contain
3081 /// duplicates.
3082 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3083
3084 /// Temporary VPValues used for external operands of the expression, i.e.
3085 /// operands not defined by recipes in the expression.
3086 SmallVector<VPValue *> LiveInPlaceholders;
3087
3088 enum class ExpressionTypes {
3089 /// Represents an inloop extended reduction operation, performing a
3090 /// reduction on an extended vector operand into a scalar value, and adding
3091 /// the result to a chain.
3092 ExtendedReduction,
3093 /// Represent an inloop multiply-accumulate reduction, multiplying the
3094 /// extended vector operands, performing a reduction.add on the result, and
3095 /// adding the scalar result to a chain.
3096 ExtMulAccReduction,
3097 /// Represent an inloop multiply-accumulate reduction, multiplying the
3098 /// vector operands, performing a reduction.add on the result, and adding
3099 /// the scalar result to a chain.
3100 MulAccReduction,
3101 /// Represent an inloop multiply-accumulate reduction, multiplying the
3102 /// extended vector operands, negating the multiplication, performing a
3103 /// reduction.add on the result, and adding the scalar result to a chain.
3104 ExtNegatedMulAccReduction,
3105 };
3106
3107 /// Type of the expression.
3108 ExpressionTypes ExpressionType;
3109
3110 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3111 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3112 /// in the expression) are replaced by temporary VPValues and the original
3113 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3114 /// as needed (excluding last) to ensure they are only used by other recipes
3115 /// in the expression.
3116 VPExpressionRecipe(ExpressionTypes ExpressionType,
3117 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3118
3119public:
3121 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3123 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3126 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3127 {Ext0, Ext1, Mul, Red}) {}
3130 VPReductionRecipe *Red)
3131 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3132 {Ext0, Ext1, Mul, Sub, Red}) {
3133 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3134 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3135 "Expected an add reduction");
3136 assert(getNumOperands() >= 3 && "Expected at least three operands");
3137 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3138 assert(SubConst && SubConst->getValue() == 0 &&
3139 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3140 }
3141
3143 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3144 for (auto *R : reverse(ExpressionRecipes)) {
3145 if (ExpressionRecipesSeen.insert(R).second)
3146 delete R;
3147 }
3148 for (VPValue *T : LiveInPlaceholders)
3149 delete T;
3150 }
3151
3152 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3153
3154 VPExpressionRecipe *clone() override {
3155 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3156 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3157 for (auto *R : ExpressionRecipes)
3158 NewExpressiondRecipes.push_back(R->clone());
3159 for (auto *New : NewExpressiondRecipes) {
3160 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3161 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3162 // Update placeholder operands in the cloned recipe to use the external
3163 // operands, to be internalized when the cloned expression is constructed.
3164 for (const auto &[Placeholder, OutsideOp] :
3165 zip(LiveInPlaceholders, operands()))
3166 New->replaceUsesOfWith(Placeholder, OutsideOp);
3167 }
3168 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3169 }
3170
3171 /// Return the VPValue to use to infer the result type of the recipe.
3173 unsigned OpIdx =
3174 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3175 : 1;
3176 return getOperand(getNumOperands() - OpIdx);
3177 }
3178
3179 /// Insert the recipes of the expression back into the VPlan, directly before
3180 /// the current recipe. Leaves the expression recipe empty, which must be
3181 /// removed before codegen.
3182 void decompose();
3183
3184 unsigned getVFScaleFactor() const {
3185 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3186 return PR ? PR->getVFScaleFactor() : 1;
3187 }
3188
3189 /// Method for generating code, must not be called as this recipe is abstract.
3190 void execute(VPTransformState &State) override {
3191 llvm_unreachable("recipe must be removed before execute");
3192 }
3193
3195 VPCostContext &Ctx) const override;
3196
3197 /// Returns true if this expression contains recipes that may read from or
3198 /// write to memory.
3199 bool mayReadOrWriteMemory() const;
3200
3201 /// Returns true if this expression contains recipes that may have side
3202 /// effects.
3203 bool mayHaveSideEffects() const;
3204
3205 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3206 bool isSingleScalar() const;
3207
3208protected:
3209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3210 /// Print the recipe.
3211 void printRecipe(raw_ostream &O, const Twine &Indent,
3212 VPSlotTracker &SlotTracker) const override;
3213#endif
3214};
3215
3216/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3217/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3218/// order to merge values that are set under such a branch and feed their uses.
3219/// The phi nodes can be scalar or vector depending on the users of the value.
3220/// This recipe works in concert with VPBranchOnMaskRecipe.
3222public:
3223 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3224 /// nodes after merging back from a Branch-on-Mask.
3226 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3227 ~VPPredInstPHIRecipe() override = default;
3228
3230 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3231 }
3232
3233 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3234
3235 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3236 /// retain SSA form.
3237 void execute(VPTransformState &State) override;
3238
3239 /// Return the cost of this VPPredInstPHIRecipe.
3241 VPCostContext &Ctx) const override {
3242 // TODO: Compute accurate cost after retiring the legacy cost model.
3243 return 0;
3244 }
3245
3246 /// Returns true if the recipe uses scalars of operand \p Op.
3247 bool usesScalars(const VPValue *Op) const override {
3249 "Op must be an operand of the recipe");
3250 return true;
3251 }
3252
3253protected:
3254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3255 /// Print the recipe.
3256 void printRecipe(raw_ostream &O, const Twine &Indent,
3257 VPSlotTracker &SlotTracker) const override;
3258#endif
3259};
3260
3261/// A common base class for widening memory operations. An optional mask can be
3262/// provided as the last operand.
3264 public VPIRMetadata {
3265protected:
3267
3268 /// Alignment information for this memory access.
3270
3271 /// Whether the accessed addresses are consecutive.
3273
3274 /// Whether the consecutive accessed addresses are in reverse order.
3276
3277 /// Whether the memory access is masked.
3278 bool IsMasked = false;
3279
3280 void setMask(VPValue *Mask) {
3281 assert(!IsMasked && "cannot re-set mask");
3282 if (!Mask)
3283 return;
3284 addOperand(Mask);
3285 IsMasked = true;
3286 }
3287
3288 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3289 std::initializer_list<VPValue *> Operands,
3290 bool Consecutive, bool Reverse,
3291 const VPIRMetadata &Metadata, DebugLoc DL)
3292 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3294 Reverse(Reverse) {
3295 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3297 "Reversed acccess without VPVectorEndPointerRecipe address?");
3298 }
3299
3300public:
3302 llvm_unreachable("cloning not supported");
3303 }
3304
3305 static inline bool classof(const VPRecipeBase *R) {
3306 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3307 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3308 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3309 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3310 }
3311
3312 static inline bool classof(const VPUser *U) {
3313 auto *R = dyn_cast<VPRecipeBase>(U);
3314 return R && classof(R);
3315 }
3316
3317 /// Return whether the loaded-from / stored-to addresses are consecutive.
3318 bool isConsecutive() const { return Consecutive; }
3319
3320 /// Return whether the consecutive loaded/stored addresses are in reverse
3321 /// order.
3322 bool isReverse() const { return Reverse; }
3323
3324 /// Return the address accessed by this recipe.
3325 VPValue *getAddr() const { return getOperand(0); }
3326
3327 /// Returns true if the recipe is masked.
3328 bool isMasked() const { return IsMasked; }
3329
3330 /// Return the mask used by this recipe. Note that a full mask is represented
3331 /// by a nullptr.
3332 VPValue *getMask() const {
3333 // Mask is optional and therefore the last operand.
3334 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3335 }
3336
3337 /// Returns the alignment of the memory access.
3338 Align getAlign() const { return Alignment; }
3339
3340 /// Generate the wide load/store.
3341 void execute(VPTransformState &State) override {
3342 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3343 }
3344
3345 /// Return the cost of this VPWidenMemoryRecipe.
3346 InstructionCost computeCost(ElementCount VF,
3347 VPCostContext &Ctx) const override;
3348
3350};
3351
3352/// A recipe for widening load operations, using the address to load from and an
3353/// optional mask.
3355 public VPValue {
3357 bool Consecutive, bool Reverse,
3358 const VPIRMetadata &Metadata, DebugLoc DL)
3359 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3360 Reverse, Metadata, DL),
3361 VPValue(this, &Load) {
3362 setMask(Mask);
3363 }
3364
3367 getMask(), Consecutive, Reverse, *this,
3368 getDebugLoc());
3369 }
3370
3371 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3372
3373 /// Generate a wide load or gather.
3374 void execute(VPTransformState &State) override;
3375
3376 /// Returns true if the recipe only uses the first lane of operand \p Op.
3377 bool usesFirstLaneOnly(const VPValue *Op) const override {
3379 "Op must be an operand of the recipe");
3380 // Widened, consecutive loads operations only demand the first lane of
3381 // their address.
3382 return Op == getAddr() && isConsecutive();
3383 }
3384
3385protected:
3386#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3387 /// Print the recipe.
3388 void printRecipe(raw_ostream &O, const Twine &Indent,
3389 VPSlotTracker &SlotTracker) const override;
3390#endif
3391};
3392
3393/// A recipe for widening load operations with vector-predication intrinsics,
3394/// using the address to load from, the explicit vector length and an optional
3395/// mask.
3396struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3398 VPValue *Mask)
3399 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3400 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3401 L.getDebugLoc()),
3402 VPValue(this, &getIngredient()) {
3403 setMask(Mask);
3404 }
3405
3406 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3407
3408 /// Return the EVL operand.
3409 VPValue *getEVL() const { return getOperand(1); }
3410
3411 /// Generate the wide load or gather.
3412 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3413
3414 /// Return the cost of this VPWidenLoadEVLRecipe.
3416 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3417
3418 /// Returns true if the recipe only uses the first lane of operand \p Op.
3419 bool usesFirstLaneOnly(const VPValue *Op) const override {
3421 "Op must be an operand of the recipe");
3422 // Widened loads only demand the first lane of EVL and consecutive loads
3423 // only demand the first lane of their address.
3424 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3425 }
3426
3427protected:
3428#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3429 /// Print the recipe.
3430 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3431 VPSlotTracker &SlotTracker) const override;
3432#endif
3433};
3434
3435/// A recipe for widening store operations, using the stored value, the address
3436/// to store to and an optional mask.
3438 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3439 VPValue *Mask, bool Consecutive, bool Reverse,
3440 const VPIRMetadata &Metadata, DebugLoc DL)
3441 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3442 Consecutive, Reverse, Metadata, DL) {
3443 setMask(Mask);
3444 }
3445
3451
3452 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3453
3454 /// Return the value stored by this recipe.
3455 VPValue *getStoredValue() const { return getOperand(1); }
3456
3457 /// Generate a wide store or scatter.
3458 void execute(VPTransformState &State) override;
3459
3460 /// Returns true if the recipe only uses the first lane of operand \p Op.
3461 bool usesFirstLaneOnly(const VPValue *Op) const override {
3463 "Op must be an operand of the recipe");
3464 // Widened, consecutive stores only demand the first lane of their address,
3465 // unless the same operand is also stored.
3466 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3467 }
3468
3469protected:
3470#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3471 /// Print the recipe.
3472 void printRecipe(raw_ostream &O, const Twine &Indent,
3473 VPSlotTracker &SlotTracker) const override;
3474#endif
3475};
3476
3477/// A recipe for widening store operations with vector-predication intrinsics,
3478/// using the value to store, the address to store to, the explicit vector
3479/// length and an optional mask.
3482 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3483 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3484 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3485 S.isReverse(), S, S.getDebugLoc()) {
3486 setMask(Mask);
3487 }
3488
3489 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3490
3491 /// Return the address accessed by this recipe.
3492 VPValue *getStoredValue() const { return getOperand(1); }
3493
3494 /// Return the EVL operand.
3495 VPValue *getEVL() const { return getOperand(2); }
3496
3497 /// Generate the wide store or scatter.
3498 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3499
3500 /// Return the cost of this VPWidenStoreEVLRecipe.
3502 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3503
3504 /// Returns true if the recipe only uses the first lane of operand \p Op.
3505 bool usesFirstLaneOnly(const VPValue *Op) const override {
3507 "Op must be an operand of the recipe");
3508 if (Op == getEVL()) {
3509 assert(getStoredValue() != Op && "unexpected store of EVL");
3510 return true;
3511 }
3512 // Widened, consecutive memory operations only demand the first lane of
3513 // their address, unless the same operand is also stored. That latter can
3514 // happen with opaque pointers.
3515 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3516 }
3517
3518protected:
3519#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3520 /// Print the recipe.
3521 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3522 VPSlotTracker &SlotTracker) const override;
3523#endif
3524};
3525
3526/// Recipe to expand a SCEV expression.
3528 const SCEV *Expr;
3529
3530public:
3532 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3533
3534 ~VPExpandSCEVRecipe() override = default;
3535
3536 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3537
3538 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3539
3540 void execute(VPTransformState &State) override {
3541 llvm_unreachable("SCEV expressions must be expanded before final execute");
3542 }
3543
3544 /// Return the cost of this VPExpandSCEVRecipe.
3546 VPCostContext &Ctx) const override {
3547 // TODO: Compute accurate cost after retiring the legacy cost model.
3548 return 0;
3549 }
3550
3551 const SCEV *getSCEV() const { return Expr; }
3552
3553protected:
3554#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3555 /// Print the recipe.
3556 void printRecipe(raw_ostream &O, const Twine &Indent,
3557 VPSlotTracker &SlotTracker) const override;
3558#endif
3559};
3560
3561/// Canonical scalar induction phi of the vector loop. Starting at the specified
3562/// start value (either 0 or the resume value when vectorizing the epilogue
3563/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3564/// canonical induction variable.
3566public:
3568 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3569
3570 ~VPCanonicalIVPHIRecipe() override = default;
3571
3573 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3574 R->addOperand(getBackedgeValue());
3575 return R;
3576 }
3577
3578 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3579
3580 void execute(VPTransformState &State) override {
3581 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3582 "scalar phi recipe");
3583 }
3584
3585 /// Returns the scalar type of the induction.
3587 return getStartValue()->getLiveInIRValue()->getType();
3588 }
3589
3590 /// Returns true if the recipe only uses the first lane of operand \p Op.
3591 bool usesFirstLaneOnly(const VPValue *Op) const override {
3593 "Op must be an operand of the recipe");
3594 return true;
3595 }
3596
3597 /// Returns true if the recipe only uses the first part of operand \p Op.
3598 bool usesFirstPartOnly(const VPValue *Op) const override {
3600 "Op must be an operand of the recipe");
3601 return true;
3602 }
3603
3604 /// Return the cost of this VPCanonicalIVPHIRecipe.
3606 VPCostContext &Ctx) const override {
3607 // For now, match the behavior of the legacy cost model.
3608 return 0;
3609 }
3610
3611protected:
3612#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3613 /// Print the recipe.
3614 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3615 VPSlotTracker &SlotTracker) const override;
3616#endif
3617};
3618
3619/// A recipe for generating the active lane mask for the vector loop that is
3620/// used to predicate the vector operations.
3622public:
3624 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3625 DL) {}
3626
3627 ~VPActiveLaneMaskPHIRecipe() override = default;
3628
3631 if (getNumOperands() == 2)
3632 R->addOperand(getOperand(1));
3633 return R;
3634 }
3635
3636 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3637
3638 /// Generate the active lane mask phi of the vector loop.
3639 void execute(VPTransformState &State) override;
3640
3641protected:
3642#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3643 /// Print the recipe.
3644 void printRecipe(raw_ostream &O, const Twine &Indent,
3645 VPSlotTracker &SlotTracker) const override;
3646#endif
3647};
3648
3649/// A recipe for generating the phi node for the current index of elements,
3650/// adjusted in accordance with EVL value. It starts at the start value of the
3651/// canonical induction and gets incremented by EVL in each iteration of the
3652/// vector loop.
3654public:
3656 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3657
3658 ~VPEVLBasedIVPHIRecipe() override = default;
3659
3661 llvm_unreachable("cloning not implemented yet");
3662 }
3663
3664 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3665
3666 void execute(VPTransformState &State) override {
3667 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3668 "scalar phi recipe");
3669 }
3670
3671 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3673 VPCostContext &Ctx) const override {
3674 // For now, match the behavior of the legacy cost model.
3675 return 0;
3676 }
3677
3678 /// Returns true if the recipe only uses the first lane of operand \p Op.
3679 bool usesFirstLaneOnly(const VPValue *Op) const override {
3681 "Op must be an operand of the recipe");
3682 return true;
3683 }
3684
3685protected:
3686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3687 /// Print the recipe.
3688 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3689 VPSlotTracker &SlotTracker) const override;
3690#endif
3691};
3692
3693/// A Recipe for widening the canonical induction variable of the vector loop.
3695 public VPUnrollPartAccessor<1> {
3696public:
3698 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3699
3700 ~VPWidenCanonicalIVRecipe() override = default;
3701
3706
3707 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3708
3709 /// Generate a canonical vector induction variable of the vector loop, with
3710 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3711 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3712 void execute(VPTransformState &State) override;
3713
3714 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3716 VPCostContext &Ctx) const override {
3717 // TODO: Compute accurate cost after retiring the legacy cost model.
3718 return 0;
3719 }
3720
3721protected:
3722#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3723 /// Print the recipe.
3724 void printRecipe(raw_ostream &O, const Twine &Indent,
3725 VPSlotTracker &SlotTracker) const override;
3726#endif
3727};
3728
3729/// A recipe for converting the input value \p IV value to the corresponding
3730/// value of an IV with different start and step values, using Start + IV *
3731/// Step.
3733 /// Kind of the induction.
3735 /// If not nullptr, the floating point induction binary operator. Must be set
3736 /// for floating point inductions.
3737 const FPMathOperator *FPBinOp;
3738
3739 /// Name to use for the generated IR instruction for the derived IV.
3740 std::string Name;
3741
3742public:
3744 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3745 const Twine &Name = "")
3747 IndDesc.getKind(),
3748 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3749 Start, CanonicalIV, Step, Name) {}
3750
3752 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3753 VPValue *Step, const Twine &Name = "")
3754 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3755 FPBinOp(FPBinOp), Name(Name.str()) {}
3756
3757 ~VPDerivedIVRecipe() override = default;
3758
3760 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3761 getStepValue());
3762 }
3763
3764 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3765
3766 /// Generate the transformed value of the induction at offset StartValue (1.
3767 /// operand) + IV (2. operand) * StepValue (3, operand).
3768 void execute(VPTransformState &State) override;
3769
3770 /// Return the cost of this VPDerivedIVRecipe.
3772 VPCostContext &Ctx) const override {
3773 // TODO: Compute accurate cost after retiring the legacy cost model.
3774 return 0;
3775 }
3776
3778 return getStartValue()->getLiveInIRValue()->getType();
3779 }
3780
3781 VPValue *getStartValue() const { return getOperand(0); }
3782 VPValue *getStepValue() const { return getOperand(2); }
3783
3784 /// Returns true if the recipe only uses the first lane of operand \p Op.
3785 bool usesFirstLaneOnly(const VPValue *Op) const override {
3787 "Op must be an operand of the recipe");
3788 return true;
3789 }
3790
3791protected:
3792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3793 /// Print the recipe.
3794 void printRecipe(raw_ostream &O, const Twine &Indent,
3795 VPSlotTracker &SlotTracker) const override;
3796#endif
3797};
3798
3799/// A recipe for handling phi nodes of integer and floating-point inductions,
3800/// producing their scalar values.
3802 public VPUnrollPartAccessor<3> {
3803 Instruction::BinaryOps InductionOpcode;
3804
3805public:
3808 DebugLoc DL)
3809 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3810 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3811 InductionOpcode(Opcode) {}
3812
3814 VPValue *Step, VPValue *VF,
3817 IV, Step, VF, IndDesc.getInductionOpcode(),
3818 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3819 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3820 : FastMathFlags(),
3821 DL) {}
3822
3823 ~VPScalarIVStepsRecipe() override = default;
3824
3826 return new VPScalarIVStepsRecipe(
3827 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3829 getDebugLoc());
3830 }
3831
3832 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3833 /// this is only accurate after the VPlan has been unrolled.
3834 bool isPart0() const { return getUnrollPart(*this) == 0; }
3835
3836 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3837
3838 /// Generate the scalarized versions of the phi node as needed by their users.
3839 void execute(VPTransformState &State) override;
3840
3841 /// Return the cost of this VPScalarIVStepsRecipe.
3843 VPCostContext &Ctx) const override {
3844 // TODO: Compute accurate cost after retiring the legacy cost model.
3845 return 0;
3846 }
3847
3848 VPValue *getStepValue() const { return getOperand(1); }
3849
3850 /// Returns true if the recipe only uses the first lane of operand \p Op.
3851 bool usesFirstLaneOnly(const VPValue *Op) const override {
3853 "Op must be an operand of the recipe");
3854 return true;
3855 }
3856
3857protected:
3858#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3859 /// Print the recipe.
3860 void printRecipe(raw_ostream &O, const Twine &Indent,
3861 VPSlotTracker &SlotTracker) const override;
3862#endif
3863};
3864
3865/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3866/// types implementing VPPhiAccessors. Used by isa<> & co.
3868 static inline bool isPossible(const VPRecipeBase *f) {
3869 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3871 }
3872};
3873/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3874/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3875template <typename SrcTy>
3876struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3877
3879
3880 /// doCast is used by cast<>.
3881 static inline VPPhiAccessors *doCast(SrcTy R) {
3882 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3883 switch (R->getVPDefID()) {
3884 case VPDef::VPInstructionSC:
3885 return cast<VPPhi>(R);
3886 case VPDef::VPIRInstructionSC:
3887 return cast<VPIRPhi>(R);
3888 case VPDef::VPWidenPHISC:
3889 return cast<VPWidenPHIRecipe>(R);
3890 default:
3891 return cast<VPHeaderPHIRecipe>(R);
3892 }
3893 }());
3894 }
3895
3896 /// doCastIfPossible is used by dyn_cast<>.
3897 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3898 if (!Self::isPossible(f))
3899 return nullptr;
3900 return doCast(f);
3901 }
3902};
3903template <>
3906template <>
3909
3910/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
3911/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
3912namespace detail {
3913template <typename DstTy, typename RecipeBasePtrTy>
3914static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
3915 switch (R->getVPDefID()) {
3916 case VPDef::VPInstructionSC:
3917 return cast<VPInstruction>(R);
3918 case VPDef::VPWidenSC:
3919 return cast<VPWidenRecipe>(R);
3920 case VPDef::VPWidenCastSC:
3921 return cast<VPWidenCastRecipe>(R);
3922 case VPDef::VPWidenIntrinsicSC:
3924 case VPDef::VPWidenCallSC:
3925 return cast<VPWidenCallRecipe>(R);
3926 case VPDef::VPWidenSelectSC:
3927 return cast<VPWidenSelectRecipe>(R);
3928 case VPDef::VPReplicateSC:
3929 return cast<VPReplicateRecipe>(R);
3930 case VPDef::VPInterleaveSC:
3931 case VPDef::VPInterleaveEVLSC:
3932 return cast<VPInterleaveBase>(R);
3933 case VPDef::VPWidenLoadSC:
3934 case VPDef::VPWidenLoadEVLSC:
3935 case VPDef::VPWidenStoreSC:
3936 case VPDef::VPWidenStoreEVLSC:
3937 return cast<VPWidenMemoryRecipe>(R);
3938 default:
3939 llvm_unreachable("invalid recipe for VPIRMetadata cast");
3940 }
3941}
3942} // namespace detail
3943
3944/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
3945/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
3946template <typename DstTy, typename SrcTy>
3947struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
3948 static inline bool isPossible(SrcTy R) {
3949 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
3950 // also handled in castToVPIRMetadata.
3956 }
3957
3958 using RetTy = DstTy *;
3959
3960 /// doCast is used by cast<>.
3961 static inline RetTy doCast(SrcTy R) {
3963 }
3964
3965 /// doCastIfPossible is used by dyn_cast<>.
3966 static inline RetTy doCastIfPossible(SrcTy R) {
3967 if (!isPossible(R))
3968 return nullptr;
3969 return doCast(R);
3970 }
3971};
3972template <>
3975template <>
3978
3979/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3980/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3981/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3982class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3983 friend class VPlan;
3984
3985 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3986 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3987 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3988 if (Recipe)
3989 appendRecipe(Recipe);
3990 }
3991
3992public:
3994
3995protected:
3996 /// The VPRecipes held in the order of output instructions to generate.
3998
3999 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4000 : VPBlockBase(BlockSC, Name.str()) {}
4001
4002public:
4003 ~VPBasicBlock() override {
4004 while (!Recipes.empty())
4005 Recipes.pop_back();
4006 }
4007
4008 /// Instruction iterators...
4013
4014 //===--------------------------------------------------------------------===//
4015 /// Recipe iterator methods
4016 ///
4017 inline iterator begin() { return Recipes.begin(); }
4018 inline const_iterator begin() const { return Recipes.begin(); }
4019 inline iterator end() { return Recipes.end(); }
4020 inline const_iterator end() const { return Recipes.end(); }
4021
4022 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4023 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4024 inline reverse_iterator rend() { return Recipes.rend(); }
4025 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4026
4027 inline size_t size() const { return Recipes.size(); }
4028 inline bool empty() const { return Recipes.empty(); }
4029 inline const VPRecipeBase &front() const { return Recipes.front(); }
4030 inline VPRecipeBase &front() { return Recipes.front(); }
4031 inline const VPRecipeBase &back() const { return Recipes.back(); }
4032 inline VPRecipeBase &back() { return Recipes.back(); }
4033
4034 /// Returns a reference to the list of recipes.
4036
4037 /// Returns a pointer to a member of the recipe list.
4038 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4039 return &VPBasicBlock::Recipes;
4040 }
4041
4042 /// Method to support type inquiry through isa, cast, and dyn_cast.
4043 static inline bool classof(const VPBlockBase *V) {
4044 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4045 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4046 }
4047
4048 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4049 assert(Recipe && "No recipe to append.");
4050 assert(!Recipe->Parent && "Recipe already in VPlan");
4051 Recipe->Parent = this;
4052 Recipes.insert(InsertPt, Recipe);
4053 }
4054
4055 /// Augment the existing recipes of a VPBasicBlock with an additional
4056 /// \p Recipe as the last recipe.
4057 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4058
4059 /// The method which generates the output IR instructions that correspond to
4060 /// this VPBasicBlock, thereby "executing" the VPlan.
4061 void execute(VPTransformState *State) override;
4062
4063 /// Return the cost of this VPBasicBlock.
4064 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4065
4066 /// Return the position of the first non-phi node recipe in the block.
4067 iterator getFirstNonPhi();
4068
4069 /// Returns an iterator range over the PHI-like recipes in the block.
4073
4074 /// Split current block at \p SplitAt by inserting a new block between the
4075 /// current block and its successors and moving all recipes starting at
4076 /// SplitAt to the new block. Returns the new block.
4077 VPBasicBlock *splitAt(iterator SplitAt);
4078
4079 VPRegionBlock *getEnclosingLoopRegion();
4080 const VPRegionBlock *getEnclosingLoopRegion() const;
4081
4082#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4083 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4084 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4085 ///
4086 /// Note that the numbering is applied to the whole VPlan, so printing
4087 /// individual blocks is consistent with the whole VPlan printing.
4088 void print(raw_ostream &O, const Twine &Indent,
4089 VPSlotTracker &SlotTracker) const override;
4090 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4091#endif
4092
4093 /// If the block has multiple successors, return the branch recipe terminating
4094 /// the block. If there are no or only a single successor, return nullptr;
4095 VPRecipeBase *getTerminator();
4096 const VPRecipeBase *getTerminator() const;
4097
4098 /// Returns true if the block is exiting it's parent region.
4099 bool isExiting() const;
4100
4101 /// Clone the current block and it's recipes, without updating the operands of
4102 /// the cloned recipes.
4103 VPBasicBlock *clone() override;
4104
4105 /// Returns the predecessor block at index \p Idx with the predecessors as per
4106 /// the corresponding plain CFG. If the block is an entry block to a region,
4107 /// the first predecessor is the single predecessor of a region, and the
4108 /// second predecessor is the exiting block of the region.
4109 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4110
4111protected:
4112 /// Execute the recipes in the IR basic block \p BB.
4113 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4114
4115 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4116 /// generated for this VPBB.
4117 void connectToPredecessors(VPTransformState &State);
4118
4119private:
4120 /// Create an IR BasicBlock to hold the output instructions generated by this
4121 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4122 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4123};
4124
4125inline const VPBasicBlock *
4127 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4128}
4129
4130/// A special type of VPBasicBlock that wraps an existing IR basic block.
4131/// Recipes of the block get added before the first non-phi instruction in the
4132/// wrapped block.
4133/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4134/// preheader block.
4135class VPIRBasicBlock : public VPBasicBlock {
4136 friend class VPlan;
4137
4138 BasicBlock *IRBB;
4139
4140 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4141 VPIRBasicBlock(BasicBlock *IRBB)
4142 : VPBasicBlock(VPIRBasicBlockSC,
4143 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4144 IRBB(IRBB) {}
4145
4146public:
4147 ~VPIRBasicBlock() override = default;
4148
4149 static inline bool classof(const VPBlockBase *V) {
4150 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4151 }
4152
4153 /// The method which generates the output IR instructions that correspond to
4154 /// this VPBasicBlock, thereby "executing" the VPlan.
4155 void execute(VPTransformState *State) override;
4156
4157 VPIRBasicBlock *clone() override;
4158
4159 BasicBlock *getIRBasicBlock() const { return IRBB; }
4160};
4161
4162/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4163/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4164/// A VPRegionBlock may indicate that its contents are to be replicated several
4165/// times. This is designed to support predicated scalarization, in which a
4166/// scalar if-then code structure needs to be generated VF * UF times. Having
4167/// this replication indicator helps to keep a single model for multiple
4168/// candidate VF's. The actual replication takes place only once the desired VF
4169/// and UF have been determined.
4170class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4171 friend class VPlan;
4172
4173 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4174 VPBlockBase *Entry;
4175
4176 /// Hold the Single Exiting block of the SESE region modelled by the
4177 /// VPRegionBlock.
4178 VPBlockBase *Exiting;
4179
4180 /// An indicator whether this region is to generate multiple replicated
4181 /// instances of output IR corresponding to its VPBlockBases.
4182 bool IsReplicator;
4183
4184 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4185 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4186 const std::string &Name = "", bool IsReplicator = false)
4187 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4188 IsReplicator(IsReplicator) {
4189 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4190 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4191 Entry->setParent(this);
4192 Exiting->setParent(this);
4193 }
4194 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4195 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4196 IsReplicator(IsReplicator) {}
4197
4198public:
4199 ~VPRegionBlock() override = default;
4200
4201 /// Method to support type inquiry through isa, cast, and dyn_cast.
4202 static inline bool classof(const VPBlockBase *V) {
4203 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4204 }
4205
4206 const VPBlockBase *getEntry() const { return Entry; }
4207 VPBlockBase *getEntry() { return Entry; }
4208
4209 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4210 /// EntryBlock must have no predecessors.
4211 void setEntry(VPBlockBase *EntryBlock) {
4212 assert(EntryBlock->getPredecessors().empty() &&
4213 "Entry block cannot have predecessors.");
4214 Entry = EntryBlock;
4215 EntryBlock->setParent(this);
4216 }
4217
4218 const VPBlockBase *getExiting() const { return Exiting; }
4219 VPBlockBase *getExiting() { return Exiting; }
4220
4221 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4222 /// ExitingBlock must have no successors.
4223 void setExiting(VPBlockBase *ExitingBlock) {
4224 assert(ExitingBlock->getSuccessors().empty() &&
4225 "Exit block cannot have successors.");
4226 Exiting = ExitingBlock;
4227 ExitingBlock->setParent(this);
4228 }
4229
4230 /// Returns the pre-header VPBasicBlock of the loop region.
4232 assert(!isReplicator() && "should only get pre-header of loop regions");
4233 return getSinglePredecessor()->getExitingBasicBlock();
4234 }
4235
4236 /// An indicator whether this region is to generate multiple replicated
4237 /// instances of output IR corresponding to its VPBlockBases.
4238 bool isReplicator() const { return IsReplicator; }
4239
4240 /// The method which generates the output IR instructions that correspond to
4241 /// this VPRegionBlock, thereby "executing" the VPlan.
4242 void execute(VPTransformState *State) override;
4243
4244 // Return the cost of this region.
4245 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4246
4247#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4248 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4249 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4250 /// consequtive numbers.
4251 ///
4252 /// Note that the numbering is applied to the whole VPlan, so printing
4253 /// individual regions is consistent with the whole VPlan printing.
4254 void print(raw_ostream &O, const Twine &Indent,
4255 VPSlotTracker &SlotTracker) const override;
4256 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4257#endif
4258
4259 /// Clone all blocks in the single-entry single-exit region of the block and
4260 /// their recipes without updating the operands of the cloned recipes.
4261 VPRegionBlock *clone() override;
4262
4263 /// Remove the current region from its VPlan, connecting its predecessor to
4264 /// its entry, and its exiting block to its successor.
4265 void dissolveToCFGLoop();
4266
4267 /// Returns the canonical induction recipe of the region.
4269 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4270 if (EntryVPBB->empty()) {
4271 // VPlan native path. TODO: Unify both code paths.
4272 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4273 }
4274 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4275 }
4277 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4278 }
4279
4280 /// Return the type of the canonical IV for loop regions.
4281 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4282 const Type *getCanonicalIVType() const {
4283 return getCanonicalIV()->getScalarType();
4284 }
4285};
4286
4288 return getParent()->getParent();
4289}
4290
4292 return getParent()->getParent();
4293}
4294
4295/// VPlan models a candidate for vectorization, encoding various decisions take
4296/// to produce efficient output IR, including which branches, basic-blocks and
4297/// output IR instructions to generate, and their cost. VPlan holds a
4298/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4299/// VPBasicBlock.
4300class VPlan {
4301 friend class VPlanPrinter;
4302 friend class VPSlotTracker;
4303
4304 /// VPBasicBlock corresponding to the original preheader. Used to place
4305 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4306 /// rest of VPlan execution.
4307 /// When this VPlan is used for the epilogue vector loop, the entry will be
4308 /// replaced by a new entry block created during skeleton creation.
4309 VPBasicBlock *Entry;
4310
4311 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4312 VPIRBasicBlock *ScalarHeader;
4313
4314 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4315 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4316 /// e.g. if the scalar epilogue always executes.
4318
4319 /// Holds the VFs applicable to this VPlan.
4321
4322 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4323 /// any UF.
4325
4326 /// Holds the name of the VPlan, for printing.
4327 std::string Name;
4328
4329 /// Represents the trip count of the original loop, for folding
4330 /// the tail.
4331 VPValue *TripCount = nullptr;
4332
4333 /// Represents the backedge taken count of the original loop, for folding
4334 /// the tail. It equals TripCount - 1.
4335 VPValue *BackedgeTakenCount = nullptr;
4336
4337 /// Represents the vector trip count.
4338 VPValue VectorTripCount;
4339
4340 /// Represents the vectorization factor of the loop.
4341 VPValue VF;
4342
4343 /// Represents the loop-invariant VF * UF of the vector loop region.
4344 VPValue VFxUF;
4345
4346 /// Contains all the external definitions created for this VPlan, as a mapping
4347 /// from IR Values to VPValues.
4349
4350 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4351 /// VPlan is destroyed.
4352 SmallVector<VPBlockBase *> CreatedBlocks;
4353
4354 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4355 /// wrapping the original header of the scalar loop.
4356 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4357 : Entry(Entry), ScalarHeader(ScalarHeader) {
4358 Entry->setPlan(this);
4359 assert(ScalarHeader->getNumSuccessors() == 0 &&
4360 "scalar header must be a leaf node");
4361 }
4362
4363public:
4364 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4365 /// original preheader and scalar header of \p L, to be used as entry and
4366 /// scalar header blocks of the new VPlan.
4367 VPlan(Loop *L);
4368
4369 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4370 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4371 VPlan(BasicBlock *ScalarHeaderBB) {
4372 setEntry(createVPBasicBlock("preheader"));
4373 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4374 }
4375
4377
4379 Entry = VPBB;
4380 VPBB->setPlan(this);
4381 }
4382
4383 /// Generate the IR code for this VPlan.
4384 void execute(VPTransformState *State);
4385
4386 /// Return the cost of this plan.
4388
4389 VPBasicBlock *getEntry() { return Entry; }
4390 const VPBasicBlock *getEntry() const { return Entry; }
4391
4392 /// Returns the preheader of the vector loop region, if one exists, or null
4393 /// otherwise.
4395 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4396 return VectorRegion
4397 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4398 : nullptr;
4399 }
4400
4401 /// Returns the VPRegionBlock of the vector loop.
4404
4405 /// Returns the 'middle' block of the plan, that is the block that selects
4406 /// whether to execute the scalar tail loop or the exit block from the loop
4407 /// latch. If there is an early exit from the vector loop, the middle block
4408 /// conceptully has the early exit block as third successor, split accross 2
4409 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4410 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4411 /// known to always execute, the middle block may branch directly to that
4412 /// block. This function cannot be called once the vector loop region has been
4413 /// removed.
4415 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4416 assert(
4417 LoopRegion &&
4418 "cannot call the function after vector loop region has been removed");
4419 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4420 if (RegionSucc->getSingleSuccessor() ||
4421 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4422 return RegionSucc;
4423 // There is an early exit. The successor of RegionSucc is the middle block.
4424 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4425 }
4426
4428 return const_cast<VPlan *>(this)->getMiddleBlock();
4429 }
4430
4431 /// Return the VPBasicBlock for the preheader of the scalar loop.
4433 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4434 }
4435
4436 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4437 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4438
4439 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4440 /// the original scalar loop.
4441 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4442
4443 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4444 /// exit block.
4446
4447 /// Returns true if \p VPBB is an exit block.
4448 bool isExitBlock(VPBlockBase *VPBB);
4449
4450 /// The trip count of the original loop.
4452 assert(TripCount && "trip count needs to be set before accessing it");
4453 return TripCount;
4454 }
4455
4456 /// Set the trip count assuming it is currently null; if it is not - use
4457 /// resetTripCount().
4458 void setTripCount(VPValue *NewTripCount) {
4459 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4460 TripCount = NewTripCount;
4461 }
4462
4463 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4464 /// the original trip count have been replaced.
4465 void resetTripCount(VPValue *NewTripCount) {
4466 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4467 "TripCount must be set when resetting");
4468 TripCount = NewTripCount;
4469 }
4470
4471 /// The backedge taken count of the original loop.
4473 if (!BackedgeTakenCount)
4474 BackedgeTakenCount = new VPValue();
4475 return BackedgeTakenCount;
4476 }
4477 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4478
4479 /// The vector trip count.
4480 VPValue &getVectorTripCount() { return VectorTripCount; }
4481
4482 /// Returns the VF of the vector loop region.
4483 VPValue &getVF() { return VF; };
4484 const VPValue &getVF() const { return VF; };
4485
4486 /// Returns VF * UF of the vector loop region.
4487 VPValue &getVFxUF() { return VFxUF; }
4488
4491 }
4492
4493 void addVF(ElementCount VF) { VFs.insert(VF); }
4494
4496 assert(hasVF(VF) && "Cannot set VF not already in plan");
4497 VFs.clear();
4498 VFs.insert(VF);
4499 }
4500
4501 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4502 bool hasScalableVF() const {
4503 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4504 }
4505
4506 /// Returns an iterator range over all VFs of the plan.
4509 return VFs;
4510 }
4511
4512 bool hasScalarVFOnly() const {
4513 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4514 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4515 "Plan with scalar VF should only have a single VF");
4516 return HasScalarVFOnly;
4517 }
4518
4519 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4520
4521 unsigned getUF() const {
4522 assert(UFs.size() == 1 && "Expected a single UF");
4523 return UFs[0];
4524 }
4525
4526 void setUF(unsigned UF) {
4527 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4528 UFs.clear();
4529 UFs.insert(UF);
4530 }
4531
4532 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4533 /// concrete UF.
4534 bool isUnrolled() const { return UFs.size() == 1; }
4535
4536 /// Return a string with the name of the plan and the applicable VFs and UFs.
4537 std::string getName() const;
4538
4539 void setName(const Twine &newName) { Name = newName.str(); }
4540
4541 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4542 /// yet) for \p V.
4544 assert(V && "Trying to get or add the VPValue of a null Value");
4545 auto [It, Inserted] = LiveIns.try_emplace(V);
4546 if (Inserted) {
4547 VPValue *VPV = new VPValue(V);
4548 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4549 It->second = VPV;
4550 }
4551
4552 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4553 return It->second;
4554 }
4555
4556 /// Return a VPValue wrapping i1 true.
4557 VPValue *getTrue() { return getConstantInt(1, 1); }
4558
4559 /// Return a VPValue wrapping i1 false.
4560 VPValue *getFalse() { return getConstantInt(1, 0); }
4561
4562 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4563 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4564 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4565 }
4566
4567 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4569 bool IsSigned = false) {
4570 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4571 }
4572
4573 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4575 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4576 }
4577
4578 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4579 VPValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4580
4581 /// Return the list of live-in VPValues available in the VPlan.
4582 auto getLiveIns() const { return LiveIns.values(); }
4583
4584#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4585 /// Print the live-ins of this VPlan to \p O.
4586 void printLiveIns(raw_ostream &O) const;
4587
4588 /// Print this VPlan to \p O.
4589 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4590
4591 /// Print this VPlan in DOT format to \p O.
4592 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4593
4594 /// Dump the plan to stderr (for debugging).
4595 LLVM_DUMP_METHOD void dump() const;
4596#endif
4597
4598 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4599 /// recipes to refer to the clones, and return it.
4601
4602 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4603 /// present. The returned block is owned by the VPlan and deleted once the
4604 /// VPlan is destroyed.
4606 VPRecipeBase *Recipe = nullptr) {
4607 auto *VPB = new VPBasicBlock(Name, Recipe);
4608 CreatedBlocks.push_back(VPB);
4609 return VPB;
4610 }
4611
4612 /// Create a new loop region with \p Name and entry and exiting blocks set
4613 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4614 /// owned by the VPlan and deleted once the VPlan is destroyed.
4615 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4616 VPBlockBase *Entry = nullptr,
4617 VPBlockBase *Exiting = nullptr) {
4618 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4619 : new VPRegionBlock(Name);
4620 CreatedBlocks.push_back(VPB);
4621 return VPB;
4622 }
4623
4624 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4625 /// returned block is owned by the VPlan and deleted once the VPlan is
4626 /// destroyed.
4628 const std::string &Name = "") {
4629 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4630 CreatedBlocks.push_back(VPB);
4631 return VPB;
4632 }
4633
4634 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4635 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4636 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4638
4639 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4640 /// instructions in \p IRBB, except its terminator which is managed by the
4641 /// successors of the block in VPlan. The returned block is owned by the VPlan
4642 /// and deleted once the VPlan is destroyed.
4644
4645 /// Returns true if the VPlan is based on a loop with an early exit. That is
4646 /// the case if the VPlan has either more than one exit block or a single exit
4647 /// block with multiple predecessors (one for the exit via the latch and one
4648 /// via the other early exit).
4649 bool hasEarlyExit() const {
4650 return count_if(ExitBlocks,
4651 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4652 1 ||
4653 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4654 }
4655
4656 /// Returns true if the scalar tail may execute after the vector loop. Note
4657 /// that this relies on unneeded branches to the scalar tail loop being
4658 /// removed.
4659 bool hasScalarTail() const {
4660 return !(!getScalarPreheader()->hasPredecessors() ||
4662 }
4663};
4664
4665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4666inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4667 Plan.print(OS);
4668 return OS;
4669}
4670#endif
4671
4672} // end namespace llvm
4673
4674#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:509
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:225
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3629
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3623
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3982
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4010
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4057
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4012
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4009
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4035
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3993
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3999
iterator end()
Definition VPlan.h:4019
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4017
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4011
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4070
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:4003
const_reverse_iterator rbegin() const
Definition VPlan.h:4023
reverse_iterator rend()
Definition VPlan.h:4024
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3997
VPRecipeBase & back()
Definition VPlan.h:4032
const VPRecipeBase & front() const
Definition VPlan.h:4029
const_iterator begin() const
Definition VPlan.h:4018
VPRecipeBase & front()
Definition VPlan.h:4030
const VPRecipeBase & back() const
Definition VPlan.h:4031
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4048
bool empty() const
Definition VPlan.h:4028
const_iterator end() const
Definition VPlan.h:4020
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4043
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4038
reverse_iterator rbegin()
Definition VPlan.h:4022
friend class VPlan
Definition VPlan.h:3983
size_t size() const
Definition VPlan.h:4027
const_reverse_iterator rend() const
Definition VPlan.h:4025
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2559
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2564
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2554
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2575
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2584
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2541
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2536
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2570
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2550
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:81
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:300
VPRegionBlock * getParent()
Definition VPlan.h:173
VPBlocksTy & getPredecessors()
Definition VPlan.h:205
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:202
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:370
void setName(const Twine &newName)
Definition VPlan.h:166
size_t getNumSuccessors() const
Definition VPlan.h:219
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:201
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:223
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:322
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:160
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:258
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:335
size_t getNumPredecessors() const
Definition VPlan.h:220
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:291
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:328
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:204
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:158
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:174
const std::string & getName() const
Definition VPlan.h:164
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:310
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:248
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:282
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:215
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:242
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:307
friend class VPBlockUtils
Definition VPlan.h:82
unsigned getVPBlockID() const
Definition VPlan.h:171
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:349
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:314
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:150
VPBlocksTy & getSuccessors()
Definition VPlan.h:199
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:271
void setParent(VPRegionBlock *P)
Definition VPlan.h:184
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:264
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:209
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:198
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3055
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3039
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3063
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3036
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3565
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3591
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3572
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3598
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3567
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3586
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3580
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3605
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:305
friend class VPValue
Definition VPlanValue.h:306
VPDef(const unsigned char SC)
Definition VPlanValue.h:384
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3771
VPValue * getStepValue() const
Definition VPlan.h:3782
Type * getScalarType() const
Definition VPlan.h:3777
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3759
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3751
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3785
VPValue * getStartValue() const
Definition VPlan.h:3781
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3743
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3679
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3660
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3666
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3672
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3655
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3540
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3545
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3531
const SCEV * getSCEV() const
Definition VPlan.h:3551
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3536
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3190
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3172
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3154
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3142
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3128
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3120
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3124
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3184
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3122
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2063
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2076
static bool classof(const VPValue *V)
Definition VPlan.h:2073
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2099
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2104
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2088
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2096
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2069
VPValue * getStartValue() const
Definition VPlan.h:2091
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2108
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2058
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1772
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1789
unsigned getOpcode() const
Definition VPlan.h:1785
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1766
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4135
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4159
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4149
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4136
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:609
FastMathFlagsTy FMFs
Definition VPlan.h:680
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:740
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:732
WrapFlagsTy WrapFlags
Definition VPlan.h:674
CmpInst::Predicate CmpPredicate
Definition VPlan.h:673
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:726
GEPNoWrapFlags GEPFlags
Definition VPlan.h:678
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:858
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:675
CmpInst::Predicate getPredicate() const
Definition VPlan.h:835
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:865
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:749
ExactFlagsTy ExactFlags
Definition VPlan.h:677
bool hasNoSignedWrap() const
Definition VPlan.h:884
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:895
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:735
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:738
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:743
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:723
bool isNonNeg() const
Definition VPlan.h:867
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:850
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:853
DisjointFlagsTy DisjointFlags
Definition VPlan.h:676
unsigned AllFlags
Definition VPlan.h:682
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:841
bool hasNoUnsignedWrap() const
Definition VPlan.h:873
FCmpFlagsTy FCmpFlags
Definition VPlan.h:681
NonNegFlagsTy NonNegFlags
Definition VPlan.h:679
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:759
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:795
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:746
VPIRFlags(Instruction &I)
Definition VPlan.h:688
Instruction & getInstruction() const
Definition VPlan.h:1451
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1459
void extractLastLaneOfLastPartOfFirstOperand(VPBuilder &Builder)
Update the recipe's first operand to the last lane of the last part of the operand using Builder.
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1438
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1465
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1453
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1426
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:982
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1018
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:990
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1002
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1265
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1306
static bool classof(const VPUser *R)
Definition VPlan.h:1291
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1273
Type * getResultType() const
Definition VPlan.h:1312
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1295
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1036
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1178
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1131
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1076
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1121
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1134
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1073
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1125
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1068
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1065
@ VScale
Returns the value for vscale.
Definition VPlan.h:1136
@ CanonicalIVIncrementForPart
Definition VPlan.h:1056
bool hasResult() const
Definition VPlan.h:1202
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1242
unsigned getOpcode() const
Definition VPlan.h:1186
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1245
friend class VPlanSlp
Definition VPlan.h:1037
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2670
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2676
static bool classof(const VPUser *U)
Definition VPlan.h:2652
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2619
Instruction * getInsertPos() const
Definition VPlan.h:2674
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2647
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2672
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2664
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2693
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2658
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2746
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2774
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2768
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2781
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2761
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2748
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2704
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2731
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2714
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2725
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2706
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1324
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1346
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1341
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4126
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1366
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1333
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1351
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1355
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3247
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3229
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3240
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3225
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:387
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:474
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4287
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:485
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:408
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:479
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:454
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:389
const VPBasicBlock * getParent() const
Definition VPlan.h:409
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:459
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:398
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2931
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2910
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2934
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2921
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2497
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2483
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2462
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2476
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2509
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2491
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2500
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2514
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2451
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2506
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2494
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:2797
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:2806
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2873
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2842
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2857
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2884
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2886
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2869
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2820
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2871
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2827
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2875
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2882
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:2877
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2836
static bool classof(const VPUser *U)
Definition VPlan.h:2847
static bool classof(const VPValue *VPV)
Definition VPlan.h:2852
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2891
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4170
const VPBlockBase * getEntry() const
Definition VPlan.h:4206
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4281
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4238
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4223
VPBlockBase * getExiting()
Definition VPlan.h:4219
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4268
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4211
const Type * getCanonicalIVType() const
Definition VPlan.h:4282
const VPBlockBase * getExiting() const
Definition VPlan.h:4218
VPBlockBase * getEntry()
Definition VPlan.h:4207
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4276
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4231
friend class VPlan
Definition VPlan.h:4171
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4202
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2953
bool isSingleScalar() const
Definition VPlan.h:2994
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2961
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3006
bool isPredicated() const
Definition VPlan.h:2996
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2975
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2999
unsigned getOpcode() const
Definition VPlan.h:3023
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3018
VPValue * getStepValue() const
Definition VPlan.h:3848
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3842
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3813
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3834
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3825
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3806
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3851
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:531
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:537
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:595
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:541
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:598
static bool classof(const VPUser *U)
Definition VPlan.h:587
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:533
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:970
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:202
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1419
operand_range operands()
Definition VPlanValue.h:270
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:246
unsigned getNumOperands() const
Definition VPlanValue.h:240
operand_iterator op_end()
Definition VPlanValue.h:268
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:241
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:221
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:264
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:263
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:51
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:181
friend class VPDef
Definition VPlanValue.h:47
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:83
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:191
unsigned getNumUsers() const
Definition VPlanValue.h:111
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:176
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1938
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1959
const VPValue * getVFValue() const
Definition VPlan.h:1934
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1952
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1945
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1923
Type * getSourceElementType() const
Definition VPlan.h:1993
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1995
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2002
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1980
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2018
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2009
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1706
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1713
const_operand_range args() const
Definition VPlan.h:1746
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1727
operand_range args()
Definition VPlan.h:1745
Function * getCalledScalarFunction() const
Definition VPlan.h:1741
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3715
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3702
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3697
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1556
Instruction::CastOps getOpcode() const
Definition VPlan.h:1592
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1595
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1564
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1577
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1886
Type * getSourceElementType() const
Definition VPlan.h:1891
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1894
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1878
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1864
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2185
static bool classof(const VPValue *V)
Definition VPlan.h:2139
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2155
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2170
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2163
PHINode * getPHINode() const
Definition VPlan.h:2165
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2127
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2151
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2168
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2177
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2134
const VPValue * getVFValue() const
Definition VPlan.h:2158
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2144
const VPValue * getStepValue() const
Definition VPlan.h:2152
const TruncInst * getTruncInst() const
Definition VPlan.h:2259
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2240
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2215
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2232
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2258
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2206
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2275
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2254
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2267
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1606
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1637
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1677
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1686
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1623
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1692
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1658
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1689
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1680
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3278
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3275
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3318
static bool classof(const VPUser *U)
Definition VPlan.h:3312
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3341
Instruction & Ingredient
Definition VPlan.h:3266
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3301
Instruction & getIngredient() const
Definition VPlan.h:3349
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3272
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3305
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3332
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3269
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3328
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3288
void setMask(VPValue *Mask)
Definition VPlan.h:3280
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3338
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3325
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3322
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2369
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2340
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2347
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2302
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2311
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2292
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1516
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1530
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1520
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1545
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4300
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1105
friend class VPSlotTracker
Definition VPlan.h:4302
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1081
bool hasVF(ElementCount VF) const
Definition VPlan.h:4501
LLVMContext & getContext() const
Definition VPlan.h:4489
VPBasicBlock * getEntry()
Definition VPlan.h:4389
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4480
void setName(const Twine &newName)
Definition VPlan.h:4539
bool hasScalableVF() const
Definition VPlan.h:4502
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4487
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4483
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4451
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4557
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4472
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4508
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:889
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:867
const VPValue & getVF() const
Definition VPlan.h:4484
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:897
const VPBasicBlock * getEntry() const
Definition VPlan.h:4390
friend class VPlanPrinter
Definition VPlan.h:4301
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4574
unsigned getUF() const
Definition VPlan.h:4521
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4627
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1219
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4582
bool hasUF(unsigned UF) const
Definition VPlan.h:4519
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4441
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4563
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4477
void setVF(ElementCount VF)
Definition VPlan.h:4495
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4534
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1010
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4649
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:992
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4427
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4458
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4465
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4414
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4378
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4605
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1225
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4560
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4543
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4615
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1111
bool hasScalarVFOnly() const
Definition VPlan.h:4512
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4432
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:904
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1064
void addVF(ElementCount VF)
Definition VPlan.h:4493
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4437
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4579
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4568
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1026
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4394
void setUF(unsigned UF)
Definition VPlan.h:4526
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4659
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1152
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4371
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2456
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:3914
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2423
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2503
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:301
constexpr bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1975
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
constexpr bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1748
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1982
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1779
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1918
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2421
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:3947
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3961
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3966
static bool isPossible(SrcTy R)
Definition VPlan.h:3948
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3876
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3897
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3878
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3881
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3868
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2415
Possible variants of a reduction.
Definition VPlan.h:2413
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2418
unsigned VFScaleFactor
Definition VPlan.h:2419
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2384
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2396
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2376
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:640
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:645
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:635
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:628
PHINode & getIRPhi()
Definition VPlan.h:1497
VPIRPhi(PHINode &PN)
Definition VPlan.h:1490
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1492
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1508
static bool classof(const VPUser *U)
Definition VPlan.h:1384
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1399
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1414
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1381
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1394
static bool classof(const VPValue *V)
Definition VPlan.h:1389
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:923
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:929
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:924
static bool classof(const VPValue *V)
Definition VPlan.h:949
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:956
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:944
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3396
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3409
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3397
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3419
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3355
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3377
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3356
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3365
A recipe for widening select instructions.
Definition VPlan.h:1805
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1816
VPWidenSelectRecipe(SelectInst *SI, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL={})
Definition VPlan.h:1806
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1837
VPValue * getCond() const
Definition VPlan.h:1832
unsigned getOpcode() const
Definition VPlan.h:1830
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3480
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3492
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3481
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3505
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3495
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3437
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3455
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3446
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3461
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3438