LLVM 22.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/ADT/ilist.h"
33#include "llvm/ADT/ilist_node.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/FMF.h"
38#include "llvm/IR/Operator.h"
41#include <cassert>
42#include <cstddef>
43#include <functional>
44#include <string>
45#include <utility>
46
47namespace llvm {
48
49class BasicBlock;
50class DominatorTree;
52class IRBuilderBase;
53struct VPTransformState;
54class raw_ostream;
56class SCEV;
57class Type;
58class VPBasicBlock;
59class VPBuilder;
60class VPDominatorTree;
61class VPRegionBlock;
62class VPlan;
63class VPLane;
65class VPlanSlp;
66class Value;
68class LoopVersioning;
69
70struct VPCostContext;
71
72namespace Intrinsic {
73typedef unsigned ID;
74}
75
76using VPlanPtr = std::unique_ptr<VPlan>;
77
78/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
79/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
81 friend class VPBlockUtils;
82
83 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
84
85 /// An optional name for the block.
86 std::string Name;
87
88 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
89 /// it is a topmost VPBlockBase.
90 VPRegionBlock *Parent = nullptr;
91
92 /// List of predecessor blocks.
94
95 /// List of successor blocks.
97
98 /// VPlan containing the block. Can only be set on the entry block of the
99 /// plan.
100 VPlan *Plan = nullptr;
101
102 /// Add \p Successor as the last successor to this block.
103 void appendSuccessor(VPBlockBase *Successor) {
104 assert(Successor && "Cannot add nullptr successor!");
105 Successors.push_back(Successor);
106 }
107
108 /// Add \p Predecessor as the last predecessor to this block.
109 void appendPredecessor(VPBlockBase *Predecessor) {
110 assert(Predecessor && "Cannot add nullptr predecessor!");
111 Predecessors.push_back(Predecessor);
112 }
113
114 /// Remove \p Predecessor from the predecessors of this block.
115 void removePredecessor(VPBlockBase *Predecessor) {
116 auto Pos = find(Predecessors, Predecessor);
117 assert(Pos && "Predecessor does not exist");
118 Predecessors.erase(Pos);
119 }
120
121 /// Remove \p Successor from the successors of this block.
122 void removeSuccessor(VPBlockBase *Successor) {
123 auto Pos = find(Successors, Successor);
124 assert(Pos && "Successor does not exist");
125 Successors.erase(Pos);
126 }
127
128 /// This function replaces one predecessor with another, useful when
129 /// trying to replace an old block in the CFG with a new one.
130 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
131 auto I = find(Predecessors, Old);
132 assert(I != Predecessors.end());
133 assert(Old->getParent() == New->getParent() &&
134 "replaced predecessor must have the same parent");
135 *I = New;
136 }
137
138 /// This function replaces one successor with another, useful when
139 /// trying to replace an old block in the CFG with a new one.
140 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
141 auto I = find(Successors, Old);
142 assert(I != Successors.end());
143 assert(Old->getParent() == New->getParent() &&
144 "replaced successor must have the same parent");
145 *I = New;
146 }
147
148protected:
149 VPBlockBase(const unsigned char SC, const std::string &N)
150 : SubclassID(SC), Name(N) {}
151
152public:
153 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
154 /// that are actually instantiated. Values of this enumeration are kept in the
155 /// SubclassID field of the VPBlockBase objects. They are used for concrete
156 /// type identification.
157 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
158
160
161 virtual ~VPBlockBase() = default;
162
163 const std::string &getName() const { return Name; }
164
165 void setName(const Twine &newName) { Name = newName.str(); }
166
167 /// \return an ID for the concrete type of this object.
168 /// This is used to implement the classof checks. This should not be used
169 /// for any other purpose, as the values may change as LLVM evolves.
170 unsigned getVPBlockID() const { return SubclassID; }
171
172 VPRegionBlock *getParent() { return Parent; }
173 const VPRegionBlock *getParent() const { return Parent; }
174
175 /// \return A pointer to the plan containing the current block.
176 VPlan *getPlan();
177 const VPlan *getPlan() const;
178
179 /// Sets the pointer of the plan containing the block. The block must be the
180 /// entry block into the VPlan.
181 void setPlan(VPlan *ParentPlan);
182
183 void setParent(VPRegionBlock *P) { Parent = P; }
184
185 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
186 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
187 /// VPBlockBase is a VPBasicBlock, it is returned.
188 const VPBasicBlock *getEntryBasicBlock() const;
189 VPBasicBlock *getEntryBasicBlock();
190
191 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
192 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
193 /// VPBlockBase is a VPBasicBlock, it is returned.
194 const VPBasicBlock *getExitingBasicBlock() const;
195 VPBasicBlock *getExitingBasicBlock();
196
197 const VPBlocksTy &getSuccessors() const { return Successors; }
198 VPBlocksTy &getSuccessors() { return Successors; }
199
202
203 const VPBlocksTy &getPredecessors() const { return Predecessors; }
204 VPBlocksTy &getPredecessors() { return Predecessors; }
205
206 /// \return the successor of this VPBlockBase if it has a single successor.
207 /// Otherwise return a null pointer.
209 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
210 }
211
212 /// \return the predecessor of this VPBlockBase if it has a single
213 /// predecessor. Otherwise return a null pointer.
215 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
216 }
217
218 size_t getNumSuccessors() const { return Successors.size(); }
219 size_t getNumPredecessors() const { return Predecessors.size(); }
220
221 /// Returns true if this block has any predecessors.
222 bool hasPredecessors() const { return !Predecessors.empty(); }
223
224 /// An Enclosing Block of a block B is any block containing B, including B
225 /// itself. \return the closest enclosing block starting from "this", which
226 /// has successors. \return the root enclosing block if all enclosing blocks
227 /// have no successors.
228 VPBlockBase *getEnclosingBlockWithSuccessors();
229
230 /// \return the closest enclosing block starting from "this", which has
231 /// predecessors. \return the root enclosing block if all enclosing blocks
232 /// have no predecessors.
233 VPBlockBase *getEnclosingBlockWithPredecessors();
234
235 /// \return the successors either attached directly to this VPBlockBase or, if
236 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
237 /// successors of its own, search recursively for the first enclosing
238 /// VPRegionBlock that has successors and return them. If no such
239 /// VPRegionBlock exists, return the (empty) successors of the topmost
240 /// VPBlockBase reached.
242 return getEnclosingBlockWithSuccessors()->getSuccessors();
243 }
244
245 /// \return the hierarchical successor of this VPBlockBase if it has a single
246 /// hierarchical successor. Otherwise return a null pointer.
248 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
249 }
250
251 /// \return the predecessors either attached directly to this VPBlockBase or,
252 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
253 /// predecessors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has predecessors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithPredecessors()->getPredecessors();
259 }
260
261 /// \return the hierarchical predecessor of this VPBlockBase if it has a
262 /// single hierarchical predecessor. Otherwise return a null pointer.
266
267 /// Set a given VPBlockBase \p Successor as the single successor of this
268 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
269 /// This VPBlockBase must have no successors.
271 assert(Successors.empty() && "Setting one successor when others exist.");
272 assert(Successor->getParent() == getParent() &&
273 "connected blocks must have the same parent");
274 appendSuccessor(Successor);
275 }
276
277 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
278 /// successors of this VPBlockBase. This VPBlockBase is not added as
279 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
280 /// successors.
281 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
282 assert(Successors.empty() && "Setting two successors when others exist.");
283 appendSuccessor(IfTrue);
284 appendSuccessor(IfFalse);
285 }
286
287 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
288 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
289 /// as successor of any VPBasicBlock in \p NewPreds.
291 assert(Predecessors.empty() && "Block predecessors already set.");
292 for (auto *Pred : NewPreds)
293 appendPredecessor(Pred);
294 }
295
296 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
297 /// This VPBlockBase must have no successors. This VPBlockBase is not added
298 /// as predecessor of any VPBasicBlock in \p NewSuccs.
300 assert(Successors.empty() && "Block successors already set.");
301 for (auto *Succ : NewSuccs)
302 appendSuccessor(Succ);
303 }
304
305 /// Remove all the predecessor of this block.
306 void clearPredecessors() { Predecessors.clear(); }
307
308 /// Remove all the successors of this block.
309 void clearSuccessors() { Successors.clear(); }
310
311 /// Swap predecessors of the block. The block must have exactly 2
312 /// predecessors.
314 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
315 std::swap(Predecessors[0], Predecessors[1]);
316 }
317
318 /// Swap successors of the block. The block must have exactly 2 successors.
319 // TODO: This should be part of introducing conditional branch recipes rather
320 // than being independent.
322 assert(Successors.size() == 2 && "must have 2 successors to swap");
323 std::swap(Successors[0], Successors[1]);
324 }
325
326 /// Returns the index for \p Pred in the blocks predecessors list.
327 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
328 assert(count(Predecessors, Pred) == 1 &&
329 "must have Pred exactly once in Predecessors");
330 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
331 }
332
333 /// Returns the index for \p Succ in the blocks successor list.
334 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
335 assert(count(Successors, Succ) == 1 &&
336 "must have Succ exactly once in Successors");
337 return std::distance(Successors.begin(), find(Successors, Succ));
338 }
339
340 /// The method which generates the output IR that correspond to this
341 /// VPBlockBase, thereby "executing" the VPlan.
342 virtual void execute(VPTransformState *State) = 0;
343
344 /// Return the cost of the block.
346
347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
348 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
349 OS << getName();
350 }
351
352 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
353 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
354 /// consequtive numbers.
355 ///
356 /// Note that the numbering is applied to the whole VPlan, so printing
357 /// individual blocks is consistent with the whole VPlan printing.
358 virtual void print(raw_ostream &O, const Twine &Indent,
359 VPSlotTracker &SlotTracker) const = 0;
360
361 /// Print plain-text dump of this VPlan to \p O.
362 void print(raw_ostream &O) const;
363
364 /// Print the successors of this block to \p O, prefixing all lines with \p
365 /// Indent.
366 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
367
368 /// Dump this VPBlockBase to dbgs().
369 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
370#endif
371
372 /// Clone the current block and it's recipes without updating the operands of
373 /// the cloned recipes, including all blocks in the single-entry single-exit
374 /// region for VPRegionBlocks.
375 virtual VPBlockBase *clone() = 0;
376};
377
378/// VPRecipeBase is a base class modeling a sequence of one or more output IR
379/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
380/// and is responsible for deleting its defined values. Single-value
381/// recipes must inherit from VPSingleDef instead of inheriting from both
382/// VPRecipeBase and VPValue separately.
384 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
385 public VPDef,
386 public VPUser {
387 friend VPBasicBlock;
388 friend class VPBlockUtils;
389
390 /// Each VPRecipe belongs to a single VPBasicBlock.
391 VPBasicBlock *Parent = nullptr;
392
393 /// The debug location for the recipe.
394 DebugLoc DL;
395
396public:
397 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
399 : VPDef(SC), VPUser(Operands), DL(DL) {}
400
401 ~VPRecipeBase() override = default;
402
403 /// Clone the current recipe.
404 virtual VPRecipeBase *clone() = 0;
405
406 /// \return the VPBasicBlock which this VPRecipe belongs to.
407 VPBasicBlock *getParent() { return Parent; }
408 const VPBasicBlock *getParent() const { return Parent; }
409
410 /// \return the VPRegionBlock which the recipe belongs to.
411 VPRegionBlock *getRegion();
412 const VPRegionBlock *getRegion() const;
413
414 /// The method which generates the output IR instructions that correspond to
415 /// this VPRecipe, thereby "executing" the VPlan.
416 virtual void execute(VPTransformState &State) = 0;
417
418 /// Return the cost of this recipe, taking into account if the cost
419 /// computation should be skipped and the ForceTargetInstructionCost flag.
420 /// Also takes care of printing the cost for debugging.
422
423 /// Insert an unlinked recipe into a basic block immediately before
424 /// the specified recipe.
425 void insertBefore(VPRecipeBase *InsertPos);
426 /// Insert an unlinked recipe into \p BB immediately before the insertion
427 /// point \p IP;
428 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
429
430 /// Insert an unlinked Recipe into a basic block immediately after
431 /// the specified Recipe.
432 void insertAfter(VPRecipeBase *InsertPos);
433
434 /// Unlink this recipe from its current VPBasicBlock and insert it into
435 /// the VPBasicBlock that MovePos lives in, right after MovePos.
436 void moveAfter(VPRecipeBase *MovePos);
437
438 /// Unlink this recipe and insert into BB before I.
439 ///
440 /// \pre I is a valid iterator into BB.
441 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
442
443 /// This method unlinks 'this' from the containing basic block, but does not
444 /// delete it.
445 void removeFromParent();
446
447 /// This method unlinks 'this' from the containing basic block and deletes it.
448 ///
449 /// \returns an iterator pointing to the element after the erased one
451
452 /// Method to support type inquiry through isa, cast, and dyn_cast.
453 static inline bool classof(const VPDef *D) {
454 // All VPDefs are also VPRecipeBases.
455 return true;
456 }
457
458 static inline bool classof(const VPUser *U) { return true; }
459
460 /// Returns true if the recipe may have side-effects.
461 bool mayHaveSideEffects() const;
462
463 /// Returns true for PHI-like recipes.
464 bool isPhi() const;
465
466 /// Returns true if the recipe may read from memory.
467 bool mayReadFromMemory() const;
468
469 /// Returns true if the recipe may write to memory.
470 bool mayWriteToMemory() const;
471
472 /// Returns true if the recipe may read from or write to memory.
473 bool mayReadOrWriteMemory() const {
475 }
476
477 /// Returns the debug location of the recipe.
478 DebugLoc getDebugLoc() const { return DL; }
479
480 /// Return true if the recipe is a scalar cast.
481 bool isScalarCast() const;
482
483 /// Set the recipe's debug location to \p NewDL.
484 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
485
486protected:
487 /// Compute the cost of this recipe either using a recipe's specialized
488 /// implementation or using the legacy cost model and the underlying
489 /// instructions.
490 virtual InstructionCost computeCost(ElementCount VF,
491 VPCostContext &Ctx) const;
492};
493
494// Helper macro to define common classof implementations for recipes.
495#define VP_CLASSOF_IMPL(VPDefID) \
496 static inline bool classof(const VPDef *D) { \
497 return D->getVPDefID() == VPDefID; \
498 } \
499 static inline bool classof(const VPValue *V) { \
500 auto *R = V->getDefiningRecipe(); \
501 return R && R->getVPDefID() == VPDefID; \
502 } \
503 static inline bool classof(const VPUser *U) { \
504 auto *R = dyn_cast<VPRecipeBase>(U); \
505 return R && R->getVPDefID() == VPDefID; \
506 } \
507 static inline bool classof(const VPRecipeBase *R) { \
508 return R->getVPDefID() == VPDefID; \
509 } \
510 static inline bool classof(const VPSingleDefRecipe *R) { \
511 return R->getVPDefID() == VPDefID; \
512 }
513
514/// VPSingleDef is a base class for recipes for modeling a sequence of one or
515/// more output IR that define a single result VPValue.
516/// Note that VPRecipeBase must be inherited from before VPValue.
517class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
518public:
519 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
521 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
522
523 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
525 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
526
527 static inline bool classof(const VPRecipeBase *R) {
528 switch (R->getVPDefID()) {
529 case VPRecipeBase::VPDerivedIVSC:
530 case VPRecipeBase::VPEVLBasedIVPHISC:
531 case VPRecipeBase::VPExpandSCEVSC:
532 case VPRecipeBase::VPExpressionSC:
533 case VPRecipeBase::VPInstructionSC:
534 case VPRecipeBase::VPReductionEVLSC:
535 case VPRecipeBase::VPReductionSC:
536 case VPRecipeBase::VPReplicateSC:
537 case VPRecipeBase::VPScalarIVStepsSC:
538 case VPRecipeBase::VPVectorPointerSC:
539 case VPRecipeBase::VPVectorEndPointerSC:
540 case VPRecipeBase::VPWidenCallSC:
541 case VPRecipeBase::VPWidenCanonicalIVSC:
542 case VPRecipeBase::VPWidenCastSC:
543 case VPRecipeBase::VPWidenGEPSC:
544 case VPRecipeBase::VPWidenIntrinsicSC:
545 case VPRecipeBase::VPWidenSC:
546 case VPRecipeBase::VPWidenSelectSC:
547 case VPRecipeBase::VPBlendSC:
548 case VPRecipeBase::VPPredInstPHISC:
549 case VPRecipeBase::VPCanonicalIVPHISC:
550 case VPRecipeBase::VPActiveLaneMaskPHISC:
551 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
552 case VPRecipeBase::VPWidenPHISC:
553 case VPRecipeBase::VPWidenIntOrFpInductionSC:
554 case VPRecipeBase::VPWidenPointerInductionSC:
555 case VPRecipeBase::VPReductionPHISC:
556 case VPRecipeBase::VPPartialReductionSC:
557 return true;
558 case VPRecipeBase::VPBranchOnMaskSC:
559 case VPRecipeBase::VPInterleaveEVLSC:
560 case VPRecipeBase::VPInterleaveSC:
561 case VPRecipeBase::VPIRInstructionSC:
562 case VPRecipeBase::VPWidenLoadEVLSC:
563 case VPRecipeBase::VPWidenLoadSC:
564 case VPRecipeBase::VPWidenStoreEVLSC:
565 case VPRecipeBase::VPWidenStoreSC:
566 case VPRecipeBase::VPHistogramSC:
567 // TODO: Widened stores don't define a value, but widened loads do. Split
568 // the recipes to be able to make widened loads VPSingleDefRecipes.
569 return false;
570 }
571 llvm_unreachable("Unhandled VPDefID");
572 }
573
574 static inline bool classof(const VPUser *U) {
575 auto *R = dyn_cast<VPRecipeBase>(U);
576 return R && classof(R);
577 }
578
579 VPSingleDefRecipe *clone() override = 0;
580
581 /// Returns the underlying instruction.
588
589#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
590 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
591 LLVM_DUMP_METHOD void dump() const;
592#endif
593};
594
595/// Class to record and manage LLVM IR flags.
597 enum class OperationType : unsigned char {
598 Cmp,
599 OverflowingBinOp,
600 Trunc,
601 DisjointOp,
602 PossiblyExactOp,
603 GEPOp,
604 FPMathOp,
605 NonNegOp,
606 Other
607 };
608
609public:
610 struct WrapFlagsTy {
611 char HasNUW : 1;
612 char HasNSW : 1;
613
615 };
616
618 char HasNUW : 1;
619 char HasNSW : 1;
620
622 };
623
628
630 char NonNeg : 1;
631 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
632 };
633
634private:
635 struct ExactFlagsTy {
636 char IsExact : 1;
637 };
638 struct FastMathFlagsTy {
639 char AllowReassoc : 1;
640 char NoNaNs : 1;
641 char NoInfs : 1;
642 char NoSignedZeros : 1;
643 char AllowReciprocal : 1;
644 char AllowContract : 1;
645 char ApproxFunc : 1;
646
647 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
648 };
649
650 OperationType OpType;
651
652 union {
657 ExactFlagsTy ExactFlags;
660 FastMathFlagsTy FMFs;
661 unsigned AllFlags;
662 };
663
664public:
665 VPIRFlags() : OpType(OperationType::Other), AllFlags(0) {}
666
668 if (auto *Op = dyn_cast<CmpInst>(&I)) {
669 OpType = OperationType::Cmp;
670 CmpPredicate = Op->getPredicate();
671 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
672 OpType = OperationType::DisjointOp;
673 DisjointFlags.IsDisjoint = Op->isDisjoint();
674 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
675 OpType = OperationType::OverflowingBinOp;
676 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
677 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
678 OpType = OperationType::Trunc;
679 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
680 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
681 OpType = OperationType::PossiblyExactOp;
682 ExactFlags.IsExact = Op->isExact();
683 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
684 OpType = OperationType::GEPOp;
685 GEPFlags = GEP->getNoWrapFlags();
686 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
687 OpType = OperationType::NonNegOp;
688 NonNegFlags.NonNeg = PNNI->hasNonNeg();
689 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
690 OpType = OperationType::FPMathOp;
691 FMFs = Op->getFastMathFlags();
692 } else {
693 OpType = OperationType::Other;
694 AllFlags = 0;
695 }
696 }
697
699 : OpType(OperationType::Cmp), CmpPredicate(Pred) {}
700
702 : OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
703
705 : OpType(OperationType::Trunc), TruncFlags(TruncFlags) {}
706
707 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), FMFs(FMFs) {}
708
710 : OpType(OperationType::DisjointOp), DisjointFlags(DisjointFlags) {}
711
713 : OpType(OperationType::NonNegOp), NonNegFlags(NonNegFlags) {}
714
716 : OpType(OperationType::GEPOp), GEPFlags(GEPFlags) {}
717
719 OpType = Other.OpType;
720 AllFlags = Other.AllFlags;
721 }
722
723 /// Only keep flags also present in \p Other. \p Other must have the same
724 /// OpType as the current object.
725 void intersectFlags(const VPIRFlags &Other);
726
727 /// Drop all poison-generating flags.
729 // NOTE: This needs to be kept in-sync with
730 // Instruction::dropPoisonGeneratingFlags.
731 switch (OpType) {
732 case OperationType::OverflowingBinOp:
733 WrapFlags.HasNUW = false;
734 WrapFlags.HasNSW = false;
735 break;
736 case OperationType::Trunc:
737 TruncFlags.HasNUW = false;
738 TruncFlags.HasNSW = false;
739 break;
740 case OperationType::DisjointOp:
741 DisjointFlags.IsDisjoint = false;
742 break;
743 case OperationType::PossiblyExactOp:
744 ExactFlags.IsExact = false;
745 break;
746 case OperationType::GEPOp:
748 break;
749 case OperationType::FPMathOp:
750 FMFs.NoNaNs = false;
751 FMFs.NoInfs = false;
752 break;
753 case OperationType::NonNegOp:
754 NonNegFlags.NonNeg = false;
755 break;
756 case OperationType::Cmp:
757 case OperationType::Other:
758 break;
759 }
760 }
761
762 /// Apply the IR flags to \p I.
763 void applyFlags(Instruction &I) const {
764 switch (OpType) {
765 case OperationType::OverflowingBinOp:
766 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
767 I.setHasNoSignedWrap(WrapFlags.HasNSW);
768 break;
769 case OperationType::Trunc:
770 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
771 I.setHasNoSignedWrap(TruncFlags.HasNSW);
772 break;
773 case OperationType::DisjointOp:
774 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
775 break;
776 case OperationType::PossiblyExactOp:
777 I.setIsExact(ExactFlags.IsExact);
778 break;
779 case OperationType::GEPOp:
780 cast<GetElementPtrInst>(&I)->setNoWrapFlags(GEPFlags);
781 break;
782 case OperationType::FPMathOp:
783 I.setHasAllowReassoc(FMFs.AllowReassoc);
784 I.setHasNoNaNs(FMFs.NoNaNs);
785 I.setHasNoInfs(FMFs.NoInfs);
786 I.setHasNoSignedZeros(FMFs.NoSignedZeros);
787 I.setHasAllowReciprocal(FMFs.AllowReciprocal);
788 I.setHasAllowContract(FMFs.AllowContract);
789 I.setHasApproxFunc(FMFs.ApproxFunc);
790 break;
791 case OperationType::NonNegOp:
792 I.setNonNeg(NonNegFlags.NonNeg);
793 break;
794 case OperationType::Cmp:
795 case OperationType::Other:
796 break;
797 }
798 }
799
801 assert(OpType == OperationType::Cmp &&
802 "recipe doesn't have a compare predicate");
803 return CmpPredicate;
804 }
805
807 assert(OpType == OperationType::Cmp &&
808 "recipe doesn't have a compare predicate");
809 CmpPredicate = Pred;
810 }
811
813
814 /// Returns true if the recipe has a comparison predicate.
815 bool hasPredicate() const { return OpType == OperationType::Cmp; }
816
817 /// Returns true if the recipe has fast-math flags.
818 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
819
821
822 /// Returns true if the recipe has non-negative flag.
823 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
824
825 bool isNonNeg() const {
826 assert(OpType == OperationType::NonNegOp &&
827 "recipe doesn't have a NNEG flag");
828 return NonNegFlags.NonNeg;
829 }
830
831 bool hasNoUnsignedWrap() const {
832 switch (OpType) {
833 case OperationType::OverflowingBinOp:
834 return WrapFlags.HasNUW;
835 case OperationType::Trunc:
836 return TruncFlags.HasNUW;
837 default:
838 llvm_unreachable("recipe doesn't have a NUW flag");
839 }
840 }
841
842 bool hasNoSignedWrap() const {
843 switch (OpType) {
844 case OperationType::OverflowingBinOp:
845 return WrapFlags.HasNSW;
846 case OperationType::Trunc:
847 return TruncFlags.HasNSW;
848 default:
849 llvm_unreachable("recipe doesn't have a NSW flag");
850 }
851 }
852
853 bool isDisjoint() const {
854 assert(OpType == OperationType::DisjointOp &&
855 "recipe cannot have a disjoing flag");
856 return DisjointFlags.IsDisjoint;
857 }
858
859#if !defined(NDEBUG)
860 /// Returns true if the set flags are valid for \p Opcode.
861 bool flagsValidForOpcode(unsigned Opcode) const;
862#endif
863
864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
865 void printFlags(raw_ostream &O) const;
866#endif
867};
868
869/// A pure-virtual common base class for recipes defining a single VPValue and
870/// using IR flags.
872 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
874 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags() {}
875
876 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
877 Instruction &I)
878 : VPSingleDefRecipe(SC, Operands, &I, I.getDebugLoc()), VPIRFlags(I) {}
879
880 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
881 const VPIRFlags &Flags,
883 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
884
885 static inline bool classof(const VPRecipeBase *R) {
886 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
887 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
888 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
889 R->getVPDefID() == VPRecipeBase::VPWidenCallSC ||
890 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
891 R->getVPDefID() == VPRecipeBase::VPWidenIntrinsicSC ||
892 R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
893 R->getVPDefID() == VPRecipeBase::VPReductionSC ||
894 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
895 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
896 R->getVPDefID() == VPRecipeBase::VPVectorEndPointerSC ||
897 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
898 }
899
900 static inline bool classof(const VPUser *U) {
901 auto *R = dyn_cast<VPRecipeBase>(U);
902 return R && classof(R);
903 }
904
905 static inline bool classof(const VPValue *V) {
906 auto *R = dyn_cast_or_null<VPRecipeBase>(V->getDefiningRecipe());
907 return R && classof(R);
908 }
909
910 VPRecipeWithIRFlags *clone() override = 0;
911
912 static inline bool classof(const VPSingleDefRecipe *U) {
913 auto *R = dyn_cast<VPRecipeBase>(U);
914 return R && classof(R);
915 }
916
917 void execute(VPTransformState &State) override = 0;
918
919 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
921 VPCostContext &Ctx) const;
922};
923
924/// Helper to access the operand that contains the unroll part for this recipe
925/// after unrolling.
926template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
927protected:
928 /// Return the VPValue operand containing the unroll part or null if there is
929 /// no such operand.
930 VPValue *getUnrollPartOperand(const VPUser &U) const;
931
932 /// Return the unroll part.
933 unsigned getUnrollPart(const VPUser &U) const;
934};
935
936/// Helper to manage IR metadata for recipes. It filters out metadata that
937/// cannot be propagated.
940
941public:
942 VPIRMetadata() = default;
943
944 /// Adds metatadata that can be preserved from the original instruction
945 /// \p I.
947
948 /// Adds metatadata that can be preserved from the original instruction
949 /// \p I and noalias metadata guaranteed by runtime checks using \p LVer.
951
952 /// Copy constructor for cloning.
953 VPIRMetadata(const VPIRMetadata &Other) = default;
954
956
957 /// Add all metadata to \p I.
958 void applyMetadata(Instruction &I) const;
959
960 /// Add metadata with kind \p Kind and \p Node.
961 void addMetadata(unsigned Kind, MDNode *Node) {
962 assert(none_of(Metadata,
963 [Kind](const std::pair<unsigned, MDNode *> &P) {
964 return P.first == Kind;
965 }) &&
966 "Kind must appear at most once in Metadata");
967 Metadata.emplace_back(Kind, Node);
968 }
969
970 /// Intersect this VPIRMetada object with \p MD, keeping only metadata
971 /// nodes that are common to both.
972 void intersect(const VPIRMetadata &MD);
973};
974
975/// This is a concrete Recipe that models a single VPlan-level instruction.
976/// While as any Recipe it may generate a sequence of IR instructions when
977/// executed, these instructions would always form a single-def expression as
978/// the VPInstruction is also a single def-use vertex.
980 public VPIRMetadata,
981 public VPUnrollPartAccessor<1> {
982 friend class VPlanSlp;
983
984public:
985 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
986 enum {
988 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
989 // values of a first-order recurrence.
993 // Creates a mask where each lane is active (true) whilst the current
994 // counter (first operand + index) is less than the second operand. i.e.
995 // mask[i] = icmpt ult (op0 + i), op1
996 // The size of the mask returned is VF * Multiplier (UF, third op).
1000 // Increment the canonical IV separately for each unrolled part.
1005 /// Given operands of (the same) struct type, creates a struct of fixed-
1006 /// width vectors each containing a struct field of all operands. The
1007 /// number of operands matches the element count of every vector.
1009 /// Creates a fixed-width vector containing all operands. The number of
1010 /// operands matches the vector element count.
1012 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1013 /// abstract VPInstruction whose single defined VPValue represents VF
1014 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1015 /// VPInstructions.
1017 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1018 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1022 // Extracts the last lane from its operand if it is a vector, or the last
1023 // part if scalar. In the latter case, the recipe will be removed during
1024 // unrolling.
1026 // Extracts the last lane for each part from its operand.
1028 // Extracts the second-to-last lane from its operand or the second-to-last
1029 // part if it is scalar. In the latter case, the recipe will be removed
1030 // during unrolling.
1032 LogicalAnd, // Non-poison propagating logical And.
1033 // Add an offset in bytes (second operand) to a base pointer (first
1034 // operand). Only generates scalar values (either for the first lane only or
1035 // for all lanes, depending on its uses).
1037 // Add a vector offset in bytes (second operand) to a scalar base pointer
1038 // (first operand).
1040 // Returns a scalar boolean value, which is true if any lane of its
1041 // (boolean) vector operands is true. It produces the reduced value across
1042 // all unrolled iterations. Unrolling will add all copies of its original
1043 // operand as additional operands. AnyOf is poison-safe as all operands
1044 // will be frozen.
1046 // Calculates the first active lane index of the vector predicate operands.
1047 // It produces the lane index across all unrolled iterations. Unrolling will
1048 // add all copies of its original operand as additional operands.
1050
1051 // The opcodes below are used for VPInstructionWithType.
1052 //
1053 /// Scale the first operand (vector step) by the second operand
1054 /// (scalar-step). Casts both operands to the result type if needed.
1056 /// Start vector for reductions with 3 operands: the original start value,
1057 /// the identity value for the reduction and an integer indicating the
1058 /// scaling factor.
1060 // Creates a step vector starting from 0 to VF with a step of 1.
1062 /// Extracts a single lane (first operand) from a set of vector operands.
1063 /// The lane specifies an index into a vector formed by combining all vector
1064 /// operands (all operands after the first one).
1066 /// Explicit user for the resume phi of the canonical induction in the main
1067 /// VPlan, used by the epilogue vector loop.
1069 /// Returns the value for vscale.
1072 };
1073
1074 /// Returns true if this VPInstruction generates scalar values for all lanes.
1075 /// Most VPInstructions generate a single value per part, either vector or
1076 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1077 /// values per all lanes, stemming from an original ingredient. This method
1078 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1079 /// underlying ingredient.
1080 bool doesGeneratePerAllLanes() const;
1081
1082private:
1083 typedef unsigned char OpcodeTy;
1084 OpcodeTy Opcode;
1085
1086 /// An optional name that can be used for the generated IR instruction.
1087 const std::string Name;
1088
1089 /// Returns true if we can generate a scalar for the first lane only if
1090 /// needed.
1091 bool canGenerateScalarForFirstLane() const;
1092
1093 /// Utility methods serving execute(): generates a single vector instance of
1094 /// the modeled instruction. \returns the generated value. . In some cases an
1095 /// existing value is returned rather than a generated one.
1096 Value *generate(VPTransformState &State);
1097
1098#if !defined(NDEBUG)
1099 /// Return the number of operands determined by the opcode of the
1100 /// VPInstruction. Returns -1u if the number of operands cannot be determined
1101 /// directly by the opcode.
1102 static unsigned getNumOperandsForOpcode(unsigned Opcode);
1103#endif
1104
1105public:
1106 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1107 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
1108 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1109 VPIRMetadata(), Opcode(Opcode), Name(Name.str()) {}
1110
1111 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1112 const VPIRFlags &Flags, const VPIRMetadata &MD = {},
1113 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1114
1115 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1116
1117 VPInstruction *clone() override {
1118 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1119 getDebugLoc(), Name);
1120 if (getUnderlyingValue())
1121 New->setUnderlyingValue(getUnderlyingInstr());
1122 return New;
1123 }
1124
1125 unsigned getOpcode() const { return Opcode; }
1126
1127 /// Generate the instruction.
1128 /// TODO: We currently execute only per-part unless a specific instance is
1129 /// provided.
1130 void execute(VPTransformState &State) override;
1131
1132 /// Return the cost of this VPInstruction.
1133 InstructionCost computeCost(ElementCount VF,
1134 VPCostContext &Ctx) const override;
1135
1136#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1137 /// Print the VPInstruction to \p O.
1138 void print(raw_ostream &O, const Twine &Indent,
1139 VPSlotTracker &SlotTracker) const override;
1140
1141 /// Print the VPInstruction to dbgs() (for debugging).
1142 LLVM_DUMP_METHOD void dump() const;
1143#endif
1144
1145 bool hasResult() const {
1146 // CallInst may or may not have a result, depending on the called function.
1147 // Conservatively return calls have results for now.
1148 switch (getOpcode()) {
1149 case Instruction::Ret:
1150 case Instruction::Br:
1151 case Instruction::Store:
1152 case Instruction::Switch:
1153 case Instruction::IndirectBr:
1154 case Instruction::Resume:
1155 case Instruction::CatchRet:
1156 case Instruction::Unreachable:
1157 case Instruction::Fence:
1158 case Instruction::AtomicRMW:
1161 return false;
1162 default:
1163 return true;
1164 }
1165 }
1166
1167 /// Returns true if the underlying opcode may read from or write to memory.
1168 bool opcodeMayReadOrWriteFromMemory() const;
1169
1170 /// Returns true if the recipe only uses the first lane of operand \p Op.
1171 bool usesFirstLaneOnly(const VPValue *Op) const override;
1172
1173 /// Returns true if the recipe only uses the first part of operand \p Op.
1174 bool usesFirstPartOnly(const VPValue *Op) const override;
1175
1176 /// Returns true if this VPInstruction produces a scalar value from a vector,
1177 /// e.g. by performing a reduction or extracting a lane.
1178 bool isVectorToScalar() const;
1179
1180 /// Returns true if this VPInstruction's operands are single scalars and the
1181 /// result is also a single scalar.
1182 bool isSingleScalar() const;
1183
1184 /// Returns the symbolic name assigned to the VPInstruction.
1185 StringRef getName() const { return Name; }
1186};
1187
1188/// A specialization of VPInstruction augmenting it with a dedicated result
1189/// type, to be used when the opcode and operands of the VPInstruction don't
1190/// directly determine the result type. Note that there is no separate VPDef ID
1191/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1192/// distinguished purely by the opcode.
1194 /// Scalar result type produced by the recipe.
1195 Type *ResultTy;
1196
1197public:
1199 Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
1200 const Twine &Name = "")
1201 : VPInstruction(Opcode, Operands, Flags, {}, DL, Name),
1202 ResultTy(ResultTy) {}
1203
1205 Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags,
1206 const VPIRMetadata &Metadata, const Twine &Name = "")
1207 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1208 ResultTy(ResultTy) {}
1209
1210 static inline bool classof(const VPRecipeBase *R) {
1211 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1212 // type information.
1213 if (R->isScalarCast())
1214 return true;
1215 auto *VPI = dyn_cast<VPInstruction>(R);
1216 if (!VPI)
1217 return false;
1218 switch (VPI->getOpcode()) {
1222 return true;
1223 default:
1224 return false;
1225 }
1226 }
1227
1228 static inline bool classof(const VPUser *R) {
1230 }
1231
1232 VPInstruction *clone() override {
1233 auto *New =
1235 *this, getDebugLoc(), getName());
1236 New->setUnderlyingValue(getUnderlyingValue());
1237 return New;
1238 }
1239
1240 void execute(VPTransformState &State) override;
1241
1242 /// Return the cost of this VPInstruction.
1244 VPCostContext &Ctx) const override {
1245 // TODO: Compute accurate cost after retiring the legacy cost model.
1246 return 0;
1247 }
1248
1249 Type *getResultType() const { return ResultTy; }
1250
1251#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1252 /// Print the recipe.
1253 void print(raw_ostream &O, const Twine &Indent,
1254 VPSlotTracker &SlotTracker) const override;
1255#endif
1256};
1257
1258/// Helper type to provide functions to access incoming values and blocks for
1259/// phi-like recipes.
1261protected:
1262 /// Return a VPRecipeBase* to the current object.
1263 virtual const VPRecipeBase *getAsRecipe() const = 0;
1264
1265public:
1266 virtual ~VPPhiAccessors() = default;
1267
1268 /// Returns the incoming VPValue with index \p Idx.
1269 VPValue *getIncomingValue(unsigned Idx) const {
1270 return getAsRecipe()->getOperand(Idx);
1271 }
1272
1273 /// Returns the incoming block with index \p Idx.
1274 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1275
1276 /// Returns the number of incoming values, also number of incoming blocks.
1277 virtual unsigned getNumIncoming() const {
1278 return getAsRecipe()->getNumOperands();
1279 }
1280
1281 /// Returns an interator range over the incoming values.
1283 return make_range(getAsRecipe()->op_begin(),
1284 getAsRecipe()->op_begin() + getNumIncoming());
1285 }
1286
1288 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1289
1290 /// Returns an iterator range over the incoming blocks.
1292 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1293 return getIncomingBlock(Idx);
1294 };
1295 return map_range(index_range(0, getNumIncoming()), GetBlock);
1296 }
1297
1298 /// Returns an iterator range over pairs of incoming values and corresponding
1299 /// incoming blocks.
1305
1306 /// Removes the incoming value for \p IncomingBlock, which must be a
1307 /// predecessor.
1308 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1309
1310#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1311 /// Print the recipe.
1313#endif
1314};
1315
1317 VPPhi(ArrayRef<VPValue *> Operands, DebugLoc DL, const Twine &Name = "")
1318 : VPInstruction(Instruction::PHI, Operands, DL, Name) {}
1319
1320 static inline bool classof(const VPUser *U) {
1321 auto *VPI = dyn_cast<VPInstruction>(U);
1322 return VPI && VPI->getOpcode() == Instruction::PHI;
1323 }
1324
1325 static inline bool classof(const VPValue *V) {
1326 auto *VPI = dyn_cast<VPInstruction>(V);
1327 return VPI && VPI->getOpcode() == Instruction::PHI;
1328 }
1329
1330 static inline bool classof(const VPSingleDefRecipe *SDR) {
1331 auto *VPI = dyn_cast<VPInstruction>(SDR);
1332 return VPI && VPI->getOpcode() == Instruction::PHI;
1333 }
1334
1335 VPPhi *clone() override {
1336 auto *PhiR = new VPPhi(operands(), getDebugLoc(), getName());
1337 PhiR->setUnderlyingValue(getUnderlyingValue());
1338 return PhiR;
1339 }
1340
1341 void execute(VPTransformState &State) override;
1342
1343#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1344 /// Print the recipe.
1345 void print(raw_ostream &O, const Twine &Indent,
1346 VPSlotTracker &SlotTracker) const override;
1347#endif
1348
1349protected:
1350 const VPRecipeBase *getAsRecipe() const override { return this; }
1351};
1352
1353/// A recipe to wrap on original IR instruction not to be modified during
1354/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1355/// Expect PHIs, VPIRInstructions cannot have any operands.
1357 Instruction &I;
1358
1359protected:
1360 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1361 /// subclasses may need to be created, e.g. VPIRPhi.
1363 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1364
1365public:
1366 ~VPIRInstruction() override = default;
1367
1368 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1369 /// VPIRInstruction.
1371
1372 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1373
1375 auto *R = create(I);
1376 for (auto *Op : operands())
1377 R->addOperand(Op);
1378 return R;
1379 }
1380
1381 void execute(VPTransformState &State) override;
1382
1383 /// Return the cost of this VPIRInstruction.
1385 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1386
1387 Instruction &getInstruction() const { return I; }
1388
1389#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1390 /// Print the recipe.
1391 void print(raw_ostream &O, const Twine &Indent,
1392 VPSlotTracker &SlotTracker) const override;
1393#endif
1394
1395 bool usesScalars(const VPValue *Op) const override {
1397 "Op must be an operand of the recipe");
1398 return true;
1399 }
1400
1401 bool usesFirstPartOnly(const VPValue *Op) const override {
1403 "Op must be an operand of the recipe");
1404 return true;
1405 }
1406
1407 bool usesFirstLaneOnly(const VPValue *Op) const override {
1409 "Op must be an operand of the recipe");
1410 return true;
1411 }
1412
1413 /// Update the recipes first operand to the last lane of the operand using \p
1414 /// Builder. Must only be used for VPIRInstructions with at least one operand
1415 /// wrapping a PHINode.
1417};
1418
1419/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1420/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1421/// allowed, and it is used to add a new incoming value for the single
1422/// predecessor VPBB.
1424 public VPPhiAccessors {
1426
1427 static inline bool classof(const VPRecipeBase *U) {
1428 auto *R = dyn_cast<VPIRInstruction>(U);
1429 return R && isa<PHINode>(R->getInstruction());
1430 }
1431
1433
1434 void execute(VPTransformState &State) override;
1435
1436#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1437 /// Print the recipe.
1438 void print(raw_ostream &O, const Twine &Indent,
1439 VPSlotTracker &SlotTracker) const override;
1440#endif
1441
1442protected:
1443 const VPRecipeBase *getAsRecipe() const override { return this; }
1444};
1445
1446/// VPWidenRecipe is a recipe for producing a widened instruction using the
1447/// opcode and operands of the recipe. This recipe covers most of the
1448/// traditional vectorization cases where each recipe transforms into a
1449/// vectorized version of itself.
1451 public VPIRMetadata {
1452 unsigned Opcode;
1453
1454public:
1455 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1456 const VPIRFlags &Flags, const VPIRMetadata &Metadata,
1457 DebugLoc DL)
1458 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, Flags, DL),
1459 VPIRMetadata(Metadata), Opcode(Opcode) {}
1460
1462 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPIRMetadata(I),
1463 Opcode(I.getOpcode()) {}
1464
1465 ~VPWidenRecipe() override = default;
1466
1467 VPWidenRecipe *clone() override {
1468 auto *R =
1469 new VPWidenRecipe(getOpcode(), operands(), *this, *this, getDebugLoc());
1470 R->setUnderlyingValue(getUnderlyingValue());
1471 return R;
1472 }
1473
1474 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1475
1476 /// Produce a widened instruction using the opcode and operands of the recipe,
1477 /// processing State.VF elements.
1478 void execute(VPTransformState &State) override;
1479
1480 /// Return the cost of this VPWidenRecipe.
1481 InstructionCost computeCost(ElementCount VF,
1482 VPCostContext &Ctx) const override;
1483
1484 unsigned getOpcode() const { return Opcode; }
1485
1486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1487 /// Print the recipe.
1488 void print(raw_ostream &O, const Twine &Indent,
1489 VPSlotTracker &SlotTracker) const override;
1490#endif
1491};
1492
1493/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1495 /// Cast instruction opcode.
1496 Instruction::CastOps Opcode;
1497
1498 /// Result type for the cast.
1499 Type *ResultTy;
1500
1501public:
1503 CastInst &UI)
1504 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), VPIRMetadata(UI),
1505 Opcode(Opcode), ResultTy(ResultTy) {
1506 assert(UI.getOpcode() == Opcode &&
1507 "opcode of underlying cast doesn't match");
1508 }
1509
1511 const VPIRFlags &Flags = {},
1512 const VPIRMetadata &Metadata = {},
1514 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, Flags, DL),
1515 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1516 assert(flagsValidForOpcode(Opcode) &&
1517 "Set flags not supported for the provided opcode");
1518 }
1519
1520 ~VPWidenCastRecipe() override = default;
1521
1523 auto *New = new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy, *this,
1524 *this, getDebugLoc());
1525 if (auto *UV = getUnderlyingValue())
1526 New->setUnderlyingValue(UV);
1527 return New;
1528 }
1529
1530 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1531
1532 /// Produce widened copies of the cast.
1533 void execute(VPTransformState &State) override;
1534
1535 /// Return the cost of this VPWidenCastRecipe.
1537 VPCostContext &Ctx) const override;
1538
1539#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1540 /// Print the recipe.
1541 void print(raw_ostream &O, const Twine &Indent,
1542 VPSlotTracker &SlotTracker) const override;
1543#endif
1544
1545 Instruction::CastOps getOpcode() const { return Opcode; }
1546
1547 /// Returns the result type of the cast.
1548 Type *getResultType() const { return ResultTy; }
1549};
1550
1551/// A recipe for widening vector intrinsics.
1553 /// ID of the vector intrinsic to widen.
1554 Intrinsic::ID VectorIntrinsicID;
1555
1556 /// Scalar return type of the intrinsic.
1557 Type *ResultTy;
1558
1559 /// True if the intrinsic may read from memory.
1560 bool MayReadFromMemory;
1561
1562 /// True if the intrinsic may read write to memory.
1563 bool MayWriteToMemory;
1564
1565 /// True if the intrinsic may have side-effects.
1566 bool MayHaveSideEffects;
1567
1568public:
1570 ArrayRef<VPValue *> CallArguments, Type *Ty,
1572 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1573 VPIRMetadata(CI), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1574 MayReadFromMemory(CI.mayReadFromMemory()),
1575 MayWriteToMemory(CI.mayWriteToMemory()),
1576 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1577
1579 ArrayRef<VPValue *> CallArguments, Type *Ty,
1581 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1582 VPIRMetadata(), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1583 LLVMContext &Ctx = Ty->getContext();
1584 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1585 MemoryEffects ME = Attrs.getMemoryEffects();
1586 MayReadFromMemory = !ME.onlyWritesMemory();
1587 MayWriteToMemory = !ME.onlyReadsMemory();
1588 MayHaveSideEffects = MayWriteToMemory ||
1589 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1590 !Attrs.hasAttribute(Attribute::WillReturn);
1591 }
1592
1593 ~VPWidenIntrinsicRecipe() override = default;
1594
1596 if (Value *CI = getUnderlyingValue())
1597 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1598 operands(), ResultTy, getDebugLoc());
1599 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1600 getDebugLoc());
1601 }
1602
1603 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1604
1605 /// Produce a widened version of the vector intrinsic.
1606 void execute(VPTransformState &State) override;
1607
1608 /// Return the cost of this vector intrinsic.
1610 VPCostContext &Ctx) const override;
1611
1612 /// Return the ID of the intrinsic.
1613 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1614
1615 /// Return the scalar return type of the intrinsic.
1616 Type *getResultType() const { return ResultTy; }
1617
1618 /// Return to name of the intrinsic as string.
1620
1621 /// Returns true if the intrinsic may read from memory.
1622 bool mayReadFromMemory() const { return MayReadFromMemory; }
1623
1624 /// Returns true if the intrinsic may write to memory.
1625 bool mayWriteToMemory() const { return MayWriteToMemory; }
1626
1627 /// Returns true if the intrinsic may have side-effects.
1628 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1629
1630#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1631 /// Print the recipe.
1632 void print(raw_ostream &O, const Twine &Indent,
1633 VPSlotTracker &SlotTracker) const override;
1634#endif
1635
1636 bool usesFirstLaneOnly(const VPValue *Op) const override;
1637};
1638
1639/// A recipe for widening Call instructions using library calls.
1641 public VPIRMetadata {
1642 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1643 /// between a given VF and the chosen vectorized variant, so there will be a
1644 /// different VPlan for each VF with a valid variant.
1645 Function *Variant;
1646
1647public:
1649 ArrayRef<VPValue *> CallArguments,
1651 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1652 *cast<Instruction>(UV)),
1653 VPIRMetadata(*cast<Instruction>(UV)), Variant(Variant) {
1654 assert(
1656 "last operand must be the called function");
1657 }
1658
1659 ~VPWidenCallRecipe() override = default;
1660
1662 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1663 getDebugLoc());
1664 }
1665
1666 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1667
1668 /// Produce a widened version of the call instruction.
1669 void execute(VPTransformState &State) override;
1670
1671 /// Return the cost of this VPWidenCallRecipe.
1672 InstructionCost computeCost(ElementCount VF,
1673 VPCostContext &Ctx) const override;
1674
1678
1681
1682#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1683 /// Print the recipe.
1684 void print(raw_ostream &O, const Twine &Indent,
1685 VPSlotTracker &SlotTracker) const override;
1686#endif
1687};
1688
1689/// A recipe representing a sequence of load -> update -> store as part of
1690/// a histogram operation. This means there may be aliasing between vector
1691/// lanes, which is handled by the llvm.experimental.vector.histogram family
1692/// of intrinsics. The only update operations currently supported are
1693/// 'add' and 'sub' where the other term is loop-invariant.
1695 /// Opcode of the update operation, currently either add or sub.
1696 unsigned Opcode;
1697
1698public:
1699 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1701 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1702
1703 ~VPHistogramRecipe() override = default;
1704
1706 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1707 }
1708
1709 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1710
1711 /// Produce a vectorized histogram operation.
1712 void execute(VPTransformState &State) override;
1713
1714 /// Return the cost of this VPHistogramRecipe.
1716 VPCostContext &Ctx) const override;
1717
1718 unsigned getOpcode() const { return Opcode; }
1719
1720 /// Return the mask operand if one was provided, or a null pointer if all
1721 /// lanes should be executed unconditionally.
1722 VPValue *getMask() const {
1723 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1724 }
1725
1726#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1727 /// Print the recipe
1728 void print(raw_ostream &O, const Twine &Indent,
1729 VPSlotTracker &SlotTracker) const override;
1730#endif
1731};
1732
1733/// A recipe for widening select instructions. Supports both wide vector and
1734/// single-scalar conditions, matching the behavior of LLVM IR's select
1735/// instruction.
1737 public VPIRMetadata {
1739 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I),
1740 VPIRMetadata(I) {}
1741
1742 ~VPWidenSelectRecipe() override = default;
1743
1748
1749 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1750
1751 /// Produce a widened version of the select instruction.
1752 void execute(VPTransformState &State) override;
1753
1754 /// Return the cost of this VPWidenSelectRecipe.
1755 InstructionCost computeCost(ElementCount VF,
1756 VPCostContext &Ctx) const override;
1757
1758#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1759 /// Print the recipe.
1760 void print(raw_ostream &O, const Twine &Indent,
1761 VPSlotTracker &SlotTracker) const override;
1762#endif
1763
1764 unsigned getOpcode() const { return Instruction::Select; }
1765
1766 VPValue *getCond() const {
1767 return getOperand(0);
1768 }
1769
1770 /// Returns true if the recipe only uses the first lane of operand \p Op.
1771 bool usesFirstLaneOnly(const VPValue *Op) const override {
1773 "Op must be an operand of the recipe");
1774 return Op == getCond() && Op->isDefinedOutsideLoopRegions();
1775 }
1776};
1777
1778/// A recipe for handling GEP instructions.
1780 Type *SourceElementTy;
1781
1782 bool isPointerLoopInvariant() const {
1783 return getOperand(0)->isDefinedOutsideLoopRegions();
1784 }
1785
1786 bool isIndexLoopInvariant(unsigned I) const {
1787 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
1788 }
1789
1790 bool areAllOperandsInvariant() const {
1791 return all_of(operands(), [](VPValue *Op) {
1792 return Op->isDefinedOutsideLoopRegions();
1793 });
1794 }
1795
1796public:
1798 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
1799 SourceElementTy(GEP->getSourceElementType()) {
1801 (void)Metadata;
1803 assert(Metadata.empty() && "unexpected metadata on GEP");
1804 }
1805
1806 ~VPWidenGEPRecipe() override = default;
1807
1812
1813 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1814
1815 /// This recipe generates a GEP instruction.
1816 unsigned getOpcode() const { return Instruction::GetElementPtr; }
1817
1818 /// Generate the gep nodes.
1819 void execute(VPTransformState &State) override;
1820
1821 Type *getSourceElementType() const { return SourceElementTy; }
1822
1823 /// Return the cost of this VPWidenGEPRecipe.
1825 VPCostContext &Ctx) const override {
1826 // TODO: Compute accurate cost after retiring the legacy cost model.
1827 return 0;
1828 }
1829
1830#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1831 /// Print the recipe.
1832 void print(raw_ostream &O, const Twine &Indent,
1833 VPSlotTracker &SlotTracker) const override;
1834#endif
1835
1836 /// Returns true if the recipe only uses the first lane of operand \p Op.
1837 bool usesFirstLaneOnly(const VPValue *Op) const override {
1839 "Op must be an operand of the recipe");
1840 if (Op == getOperand(0))
1841 return isPointerLoopInvariant();
1842 else
1843 return !isPointerLoopInvariant() && Op->isDefinedOutsideLoopRegions();
1844 }
1845};
1846
1847/// A recipe to compute a pointer to the last element of each part of a widened
1848/// memory access for widened memory accesses of IndexedTy. Used for
1849/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed.
1851 public VPUnrollPartAccessor<2> {
1852 Type *IndexedTy;
1853
1854 /// The constant stride of the pointer computed by this recipe, expressed in
1855 /// units of IndexedTy.
1856 int64_t Stride;
1857
1858public:
1860 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
1861 : VPRecipeWithIRFlags(VPDef::VPVectorEndPointerSC,
1862 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1863 IndexedTy(IndexedTy), Stride(Stride) {
1864 assert(Stride < 0 && "Stride must be negative");
1865 }
1866
1867 VP_CLASSOF_IMPL(VPDef::VPVectorEndPointerSC)
1868
1870 const VPValue *getVFValue() const { return getOperand(1); }
1871
1872 void execute(VPTransformState &State) override;
1873
1874 bool usesFirstLaneOnly(const VPValue *Op) const override {
1876 "Op must be an operand of the recipe");
1877 return true;
1878 }
1879
1880 /// Return the cost of this VPVectorPointerRecipe.
1882 VPCostContext &Ctx) const override {
1883 // TODO: Compute accurate cost after retiring the legacy cost model.
1884 return 0;
1885 }
1886
1887 /// Returns true if the recipe only uses the first part of operand \p Op.
1888 bool usesFirstPartOnly(const VPValue *Op) const override {
1890 "Op must be an operand of the recipe");
1891 assert(getNumOperands() <= 2 && "must have at most two operands");
1892 return true;
1893 }
1894
1896 return new VPVectorEndPointerRecipe(getOperand(0), getVFValue(), IndexedTy,
1897 Stride, getGEPNoWrapFlags(),
1898 getDebugLoc());
1899 }
1900
1901#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1902 /// Print the recipe.
1903 void print(raw_ostream &O, const Twine &Indent,
1904 VPSlotTracker &SlotTracker) const override;
1905#endif
1906};
1907
1908/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1910 public VPUnrollPartAccessor<1> {
1911 Type *SourceElementTy;
1912
1913public:
1916 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1917 GEPFlags, DL),
1918 SourceElementTy(SourceElementTy) {}
1919
1920 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1921
1922 void execute(VPTransformState &State) override;
1923
1924 Type *getSourceElementType() const { return SourceElementTy; }
1925
1926 bool usesFirstLaneOnly(const VPValue *Op) const override {
1928 "Op must be an operand of the recipe");
1929 return true;
1930 }
1931
1932 /// Returns true if the recipe only uses the first part of operand \p Op.
1933 bool usesFirstPartOnly(const VPValue *Op) const override {
1935 "Op must be an operand of the recipe");
1936 assert(getNumOperands() <= 2 && "must have at most two operands");
1937 return true;
1938 }
1939
1941 return new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
1943 }
1944
1945 /// Return true if this VPVectorPointerRecipe corresponds to part 0. Note that
1946 /// this is only accurate after the VPlan has been unrolled.
1947 bool isFirstPart() const { return getUnrollPart(*this) == 0; }
1948
1949 /// Return the cost of this VPHeaderPHIRecipe.
1951 VPCostContext &Ctx) const override {
1952 // TODO: Compute accurate cost after retiring the legacy cost model.
1953 return 0;
1954 }
1955
1956#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1957 /// Print the recipe.
1958 void print(raw_ostream &O, const Twine &Indent,
1959 VPSlotTracker &SlotTracker) const override;
1960#endif
1961};
1962
1963/// A pure virtual base class for all recipes modeling header phis, including
1964/// phis for first order recurrences, pointer inductions and reductions. The
1965/// start value is the first operand of the recipe and the incoming value from
1966/// the backedge is the second operand.
1967///
1968/// Inductions are modeled using the following sub-classes:
1969/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1970/// starting at a specified value (zero for the main vector loop, the resume
1971/// value for the epilogue vector loop) and stepping by 1. The induction
1972/// controls exiting of the vector loop by comparing against the vector trip
1973/// count. Produces a single scalar PHI for the induction value per
1974/// iteration.
1975/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1976/// floating point inductions with arbitrary start and step values. Produces
1977/// a vector PHI per-part.
1978/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1979/// value of an IV with different start and step values. Produces a single
1980/// scalar value per iteration
1981/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1982/// canonical or derived induction.
1983/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1984/// pointer induction. Produces either a vector PHI per-part or scalar values
1985/// per-lane based on the canonical induction.
1987 public VPPhiAccessors {
1988protected:
1989 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1990 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
1991 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>({Start}),
1992 UnderlyingInstr, DL) {}
1993
1994 const VPRecipeBase *getAsRecipe() const override { return this; }
1995
1996public:
1997 ~VPHeaderPHIRecipe() override = default;
1998
1999 /// Method to support type inquiry through isa, cast, and dyn_cast.
2000 static inline bool classof(const VPRecipeBase *B) {
2001 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2002 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2003 }
2004 static inline bool classof(const VPValue *V) {
2005 auto *B = V->getDefiningRecipe();
2006 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2007 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2008 }
2009
2010 /// Generate the phi nodes.
2011 void execute(VPTransformState &State) override = 0;
2012
2013 /// Return the cost of this header phi recipe.
2015 VPCostContext &Ctx) const override;
2016
2017#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2018 /// Print the recipe.
2019 void print(raw_ostream &O, const Twine &Indent,
2020 VPSlotTracker &SlotTracker) const override = 0;
2021#endif
2022
2023 /// Returns the start value of the phi, if one is set.
2025 return getNumOperands() == 0 ? nullptr : getOperand(0);
2026 }
2028 return getNumOperands() == 0 ? nullptr : getOperand(0);
2029 }
2030
2031 /// Update the start value of the recipe.
2033
2034 /// Returns the incoming value from the loop backedge.
2036 return getOperand(1);
2037 }
2038
2039 /// Update the incoming value from the loop backedge.
2041
2042 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2043 /// to be a recipe.
2045 return *getBackedgeValue()->getDefiningRecipe();
2046 }
2047};
2048
2049/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2050/// VPWidenPointerInductionRecipe), providing shared functionality, including
2051/// retrieving the step value, induction descriptor and original phi node.
2053 const InductionDescriptor &IndDesc;
2054
2055public:
2056 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2057 VPValue *Step, const InductionDescriptor &IndDesc,
2058 DebugLoc DL)
2059 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2060 addOperand(Step);
2061 }
2062
2063 static inline bool classof(const VPRecipeBase *R) {
2064 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2065 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2066 }
2067
2068 static inline bool classof(const VPValue *V) {
2069 auto *R = V->getDefiningRecipe();
2070 return R && classof(R);
2071 }
2072
2073 static inline bool classof(const VPHeaderPHIRecipe *R) {
2074 return classof(static_cast<const VPRecipeBase *>(R));
2075 }
2076
2077 void execute(VPTransformState &State) override = 0;
2078
2079 /// Returns the step value of the induction.
2081 const VPValue *getStepValue() const { return getOperand(1); }
2082
2083 /// Update the step value of the recipe.
2084 void setStepValue(VPValue *V) { setOperand(1, V); }
2085
2087 const VPValue *getVFValue() const { return getOperand(2); }
2088
2089 /// Returns the number of incoming values, also number of incoming blocks.
2090 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2091 /// incoming value, its start value.
2092 unsigned getNumIncoming() const override { return 1; }
2093
2095
2096 /// Returns the induction descriptor for the recipe.
2097 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2098
2100 // TODO: All operands of base recipe must exist and be at same index in
2101 // derived recipe.
2103 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2104 }
2105
2107 // TODO: All operands of base recipe must exist and be at same index in
2108 // derived recipe.
2110 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2111 }
2112
2113 /// Returns true if the recipe only uses the first lane of operand \p Op.
2114 bool usesFirstLaneOnly(const VPValue *Op) const override {
2116 "Op must be an operand of the recipe");
2117 // The recipe creates its own wide start value, so it only requests the
2118 // first lane of the operand.
2119 // TODO: Remove once creating the start value is modeled separately.
2120 return Op == getStartValue() || Op == getStepValue();
2121 }
2122};
2123
2124/// A recipe for handling phi nodes of integer and floating-point inductions,
2125/// producing their vector values. This is an abstract recipe and must be
2126/// converted to concrete recipes before executing.
2128 TruncInst *Trunc;
2129
2130 // If this recipe is unrolled it will have 2 additional operands.
2131 bool isUnrolled() const { return getNumOperands() == 5; }
2132
2133public:
2135 VPValue *VF, const InductionDescriptor &IndDesc,
2136 DebugLoc DL)
2137 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2138 Step, IndDesc, DL),
2139 Trunc(nullptr) {
2140 addOperand(VF);
2141 }
2142
2144 VPValue *VF, const InductionDescriptor &IndDesc,
2145 TruncInst *Trunc, DebugLoc DL)
2146 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2147 Step, IndDesc, DL),
2148 Trunc(Trunc) {
2149 addOperand(VF);
2151 (void)Metadata;
2152 if (Trunc)
2154 assert(Metadata.empty() && "unexpected metadata on Trunc");
2155 }
2156
2158
2164
2165 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2166
2167 void execute(VPTransformState &State) override {
2168 llvm_unreachable("cannot execute this recipe, should be expanded via "
2169 "expandVPWidenIntOrFpInductionRecipe");
2170 }
2171
2172#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2173 /// Print the recipe.
2174 void print(raw_ostream &O, const Twine &Indent,
2175 VPSlotTracker &SlotTracker) const override;
2176#endif
2177
2179 // If the recipe has been unrolled return the VPValue for the induction
2180 // increment.
2181 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2182 }
2183
2184 /// Returns the number of incoming values, also number of incoming blocks.
2185 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2186 /// incoming value, its start value.
2187 unsigned getNumIncoming() const override { return 1; }
2188
2189 /// Returns the first defined value as TruncInst, if it is one or nullptr
2190 /// otherwise.
2191 TruncInst *getTruncInst() { return Trunc; }
2192 const TruncInst *getTruncInst() const { return Trunc; }
2193
2194 /// Returns true if the induction is canonical, i.e. starting at 0 and
2195 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2196 /// same type as the canonical induction.
2197 bool isCanonical() const;
2198
2199 /// Returns the scalar type of the induction.
2201 return Trunc ? Trunc->getType()
2203 }
2204
2205 /// Returns the VPValue representing the value of this induction at
2206 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2207 /// take place.
2209 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2210 }
2211};
2212
2214 bool IsScalarAfterVectorization;
2215
2216public:
2217 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2218 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2219 /// VF*UF.
2221 VPValue *NumUnrolledElems,
2222 const InductionDescriptor &IndDesc,
2223 bool IsScalarAfterVectorization, DebugLoc DL)
2224 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2225 Step, IndDesc, DL),
2226 IsScalarAfterVectorization(IsScalarAfterVectorization) {
2227 addOperand(NumUnrolledElems);
2228 }
2229
2231
2235 getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization,
2236 getDebugLoc());
2237 }
2238
2239 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2240
2241 /// Generate vector values for the pointer induction.
2242 void execute(VPTransformState &State) override {
2243 llvm_unreachable("cannot execute this recipe, should be expanded via "
2244 "expandVPWidenPointerInduction");
2245 };
2246
2247 /// Returns true if only scalar values will be generated.
2248 bool onlyScalarsGenerated(bool IsScalable);
2249
2250#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2251 /// Print the recipe.
2252 void print(raw_ostream &O, const Twine &Indent,
2253 VPSlotTracker &SlotTracker) const override;
2254#endif
2255};
2256
2257/// A recipe for widened phis. Incoming values are operands of the recipe and
2258/// their operand index corresponds to the incoming predecessor block. If the
2259/// recipe is placed in an entry block to a (non-replicate) region, it must have
2260/// exactly 2 incoming values, the first from the predecessor of the region and
2261/// the second from the exiting block of the region.
2263 public VPPhiAccessors {
2264 /// Name to use for the generated IR instruction for the widened phi.
2265 std::string Name;
2266
2267protected:
2268 const VPRecipeBase *getAsRecipe() const override { return this; }
2269
2270public:
2271 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2272 /// debug location \p DL.
2273 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2274 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2275 : VPSingleDefRecipe(VPDef::VPWidenPHISC, {}, Phi, DL), Name(Name.str()) {
2276 if (Start)
2277 addOperand(Start);
2278 }
2279
2282 getOperand(0), getDebugLoc(), Name);
2284 C->addOperand(Op);
2285 return C;
2286 }
2287
2288 ~VPWidenPHIRecipe() override = default;
2289
2290 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2291
2292 /// Generate the phi/select nodes.
2293 void execute(VPTransformState &State) override;
2294
2295#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2296 /// Print the recipe.
2297 void print(raw_ostream &O, const Twine &Indent,
2298 VPSlotTracker &SlotTracker) const override;
2299#endif
2300};
2301
2302/// A recipe for handling first-order recurrence phis. The start value is the
2303/// first operand of the recipe and the incoming value from the backedge is the
2304/// second operand.
2307 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2308
2309 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2310
2315
2316 void execute(VPTransformState &State) override;
2317
2318 /// Return the cost of this first-order recurrence phi recipe.
2320 VPCostContext &Ctx) const override;
2321
2322#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2323 /// Print the recipe.
2324 void print(raw_ostream &O, const Twine &Indent,
2325 VPSlotTracker &SlotTracker) const override;
2326#endif
2327
2328 /// Returns true if the recipe only uses the first lane of operand \p Op.
2329 bool usesFirstLaneOnly(const VPValue *Op) const override {
2331 "Op must be an operand of the recipe");
2332 return Op == getStartValue();
2333 }
2334};
2335
2336/// A recipe for handling reduction phis. The start value is the first operand
2337/// of the recipe and the incoming value from the backedge is the second
2338/// operand.
2340 public VPUnrollPartAccessor<2> {
2341 /// The recurrence kind of the reduction.
2342 const RecurKind Kind;
2343
2344 /// The phi is part of an in-loop reduction.
2345 bool IsInLoop;
2346
2347 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2348 bool IsOrdered;
2349
2350 /// When expanding the reduction PHI, the plan's VF element count is divided
2351 /// by this factor to form the reduction phi's VF.
2352 unsigned VFScaleFactor = 1;
2353
2354public:
2355 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2357 bool IsInLoop = false, bool IsOrdered = false,
2358 unsigned VFScaleFactor = 1)
2359 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start), Kind(Kind),
2360 IsInLoop(IsInLoop), IsOrdered(IsOrdered), VFScaleFactor(VFScaleFactor) {
2361 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2362 }
2363
2364 ~VPReductionPHIRecipe() override = default;
2365
2367 auto *R = new VPReductionPHIRecipe(
2369 *getOperand(0), IsInLoop, IsOrdered, VFScaleFactor);
2370 R->addOperand(getBackedgeValue());
2371 return R;
2372 }
2373
2374 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2375
2376 /// Generate the phi/select nodes.
2377 void execute(VPTransformState &State) override;
2378
2379 /// Get the factor that the VF of this recipe's output should be scaled by.
2380 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2381
2382#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2383 /// Print the recipe.
2384 void print(raw_ostream &O, const Twine &Indent,
2385 VPSlotTracker &SlotTracker) const override;
2386#endif
2387
2388 /// Returns the number of incoming values, also number of incoming blocks.
2389 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2390 /// incoming value, its start value.
2391 unsigned getNumIncoming() const override { return 2; }
2392
2393 /// Returns the recurrence kind of the reduction.
2394 RecurKind getRecurrenceKind() const { return Kind; }
2395
2396 /// Returns true, if the phi is part of an ordered reduction.
2397 bool isOrdered() const { return IsOrdered; }
2398
2399 /// Returns true, if the phi is part of an in-loop reduction.
2400 bool isInLoop() const { return IsInLoop; }
2401
2402 /// Returns true if the recipe only uses the first lane of operand \p Op.
2403 bool usesFirstLaneOnly(const VPValue *Op) const override {
2405 "Op must be an operand of the recipe");
2406 return isOrdered() || isInLoop();
2407 }
2408};
2409
2410/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2411/// instructions.
2413public:
2414 /// The blend operation is a User of the incoming values and of their
2415 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2416 /// be omitted (implied by passing an odd number of operands) in which case
2417 /// all other incoming values are merged into it.
2419 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2420 assert(Operands.size() > 0 && "Expected at least one operand!");
2421 }
2422
2427
2428 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2429
2430 /// A normalized blend is one that has an odd number of operands, whereby the
2431 /// first operand does not have an associated mask.
2432 bool isNormalized() const { return getNumOperands() % 2; }
2433
2434 /// Return the number of incoming values, taking into account when normalized
2435 /// the first incoming value will have no mask.
2436 unsigned getNumIncomingValues() const {
2437 return (getNumOperands() + isNormalized()) / 2;
2438 }
2439
2440 /// Return incoming value number \p Idx.
2441 VPValue *getIncomingValue(unsigned Idx) const {
2442 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2443 }
2444
2445 /// Return mask number \p Idx.
2446 VPValue *getMask(unsigned Idx) const {
2447 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2448 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2449 }
2450
2451 /// Set mask number \p Idx to \p V.
2452 void setMask(unsigned Idx, VPValue *V) {
2453 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2454 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2455 }
2456
2457 void execute(VPTransformState &State) override {
2458 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2459 }
2460
2461 /// Return the cost of this VPWidenMemoryRecipe.
2462 InstructionCost computeCost(ElementCount VF,
2463 VPCostContext &Ctx) const override;
2464
2465#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2466 /// Print the recipe.
2467 void print(raw_ostream &O, const Twine &Indent,
2468 VPSlotTracker &SlotTracker) const override;
2469#endif
2470
2471 /// Returns true if the recipe only uses the first lane of operand \p Op.
2472 bool usesFirstLaneOnly(const VPValue *Op) const override {
2474 "Op must be an operand of the recipe");
2475 // Recursing through Blend recipes only, must terminate at header phi's the
2476 // latest.
2477 return all_of(users(),
2478 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2479 }
2480};
2481
2482/// A common base class for interleaved memory operations.
2483/// An Interleaved memory operation is a memory access method that combines
2484/// multiple strided loads/stores into a single wide load/store with shuffles.
2485/// The first operand is the start address. The optional operands are, in order,
2486/// the stored values and the mask.
2488 public VPIRMetadata {
2490
2491 /// Indicates if the interleave group is in a conditional block and requires a
2492 /// mask.
2493 bool HasMask = false;
2494
2495 /// Indicates if gaps between members of the group need to be masked out or if
2496 /// unusued gaps can be loaded speculatively.
2497 bool NeedsMaskForGaps = false;
2498
2499protected:
2500 VPInterleaveBase(const unsigned char SC,
2502 ArrayRef<VPValue *> Operands,
2503 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2504 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2505 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2506 NeedsMaskForGaps(NeedsMaskForGaps) {
2507 // TODO: extend the masked interleaved-group support to reversed access.
2508 assert((!Mask || !IG->isReverse()) &&
2509 "Reversed masked interleave-group not supported.");
2510 for (unsigned I = 0; I < IG->getFactor(); ++I)
2511 if (Instruction *Inst = IG->getMember(I)) {
2512 if (Inst->getType()->isVoidTy())
2513 continue;
2514 new VPValue(Inst, this);
2515 }
2516
2517 for (auto *SV : StoredValues)
2518 addOperand(SV);
2519 if (Mask) {
2520 HasMask = true;
2521 addOperand(Mask);
2522 }
2523 }
2524
2525public:
2526 VPInterleaveBase *clone() override = 0;
2527
2528 static inline bool classof(const VPRecipeBase *R) {
2529 return R->getVPDefID() == VPRecipeBase::VPInterleaveSC ||
2530 R->getVPDefID() == VPRecipeBase::VPInterleaveEVLSC;
2531 }
2532
2533 static inline bool classof(const VPUser *U) {
2534 auto *R = dyn_cast<VPRecipeBase>(U);
2535 return R && classof(R);
2536 }
2537
2538 /// Return the address accessed by this recipe.
2539 VPValue *getAddr() const {
2540 return getOperand(0); // Address is the 1st, mandatory operand.
2541 }
2542
2543 /// Return the mask used by this recipe. Note that a full mask is represented
2544 /// by a nullptr.
2545 VPValue *getMask() const {
2546 // Mask is optional and the last operand.
2547 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2548 }
2549
2550 /// Return true if the access needs a mask because of the gaps.
2551 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2552
2554
2555 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2556
2557 void execute(VPTransformState &State) override {
2558 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2559 }
2560
2561 /// Return the cost of this recipe.
2562 InstructionCost computeCost(ElementCount VF,
2563 VPCostContext &Ctx) const override;
2564
2565 /// Returns true if the recipe only uses the first lane of operand \p Op.
2566 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2567
2568 /// Returns the number of stored operands of this interleave group. Returns 0
2569 /// for load interleave groups.
2570 virtual unsigned getNumStoreOperands() const = 0;
2571
2572 /// Return the VPValues stored by this interleave group. If it is a load
2573 /// interleave group, return an empty ArrayRef.
2575 return ArrayRef<VPValue *>(op_end() -
2576 (getNumStoreOperands() + (HasMask ? 1 : 0)),
2578 }
2579};
2580
2581/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2582/// or stores into one wide load/store and shuffles. The first operand of a
2583/// VPInterleave recipe is the address, followed by the stored values, followed
2584/// by an optional mask.
2586public:
2588 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2589 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2590 : VPInterleaveBase(VPDef::VPInterleaveSC, IG, Addr, StoredValues, Mask,
2591 NeedsMaskForGaps, MD, DL) {}
2592
2593 ~VPInterleaveRecipe() override = default;
2594
2598 needsMaskForGaps(), *this, getDebugLoc());
2599 }
2600
2601 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2602
2603 /// Generate the wide load or store, and shuffles.
2604 void execute(VPTransformState &State) override;
2605
2606#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2607 /// Print the recipe.
2608 void print(raw_ostream &O, const Twine &Indent,
2609 VPSlotTracker &SlotTracker) const override;
2610#endif
2611
2612 bool usesFirstLaneOnly(const VPValue *Op) const override {
2614 "Op must be an operand of the recipe");
2615 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2616 }
2617
2618 unsigned getNumStoreOperands() const override {
2619 return getNumOperands() - (getMask() ? 2 : 1);
2620 }
2621};
2622
2623/// A recipe for interleaved memory operations with vector-predication
2624/// intrinsics. The first operand is the address, the second operand is the
2625/// explicit vector length. Stored values and mask are optional operands.
2627public:
2629 : VPInterleaveBase(VPDef::VPInterleaveEVLSC, R.getInterleaveGroup(),
2630 ArrayRef<VPValue *>({R.getAddr(), &EVL}),
2631 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2632 R.getDebugLoc()) {
2633 assert(!getInterleaveGroup()->isReverse() &&
2634 "Reversed interleave-group with tail folding is not supported.");
2635 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2636 "supported for scalable vector.");
2637 }
2638
2639 ~VPInterleaveEVLRecipe() override = default;
2640
2642 llvm_unreachable("cloning not implemented yet");
2643 }
2644
2645 VP_CLASSOF_IMPL(VPDef::VPInterleaveEVLSC)
2646
2647 /// The VPValue of the explicit vector length.
2648 VPValue *getEVL() const { return getOperand(1); }
2649
2650 /// Generate the wide load or store, and shuffles.
2651 void execute(VPTransformState &State) override;
2652
2653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2654 /// Print the recipe.
2655 void print(raw_ostream &O, const Twine &Indent,
2656 VPSlotTracker &SlotTracker) const override;
2657#endif
2658
2659 /// The recipe only uses the first lane of the address, and EVL operand.
2660 bool usesFirstLaneOnly(const VPValue *Op) const override {
2662 "Op must be an operand of the recipe");
2663 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
2664 Op == getEVL();
2665 }
2666
2667 unsigned getNumStoreOperands() const override {
2668 return getNumOperands() - (getMask() ? 3 : 2);
2669 }
2670};
2671
2672/// A recipe to represent inloop reduction operations, performing a reduction on
2673/// a vector operand into a scalar value, and adding the result to a chain.
2674/// The Operands are {ChainOp, VecOp, [Condition]}.
2676 /// The recurrence kind for the reduction in question.
2677 RecurKind RdxKind;
2678 bool IsOrdered;
2679 /// Whether the reduction is conditional.
2680 bool IsConditional = false;
2681
2682protected:
2683 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
2685 ArrayRef<VPValue *> Operands, VPValue *CondOp,
2686 bool IsOrdered, DebugLoc DL)
2687 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
2688 IsOrdered(IsOrdered) {
2689 if (CondOp) {
2690 IsConditional = true;
2691 addOperand(CondOp);
2692 }
2694 }
2695
2696public:
2698 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2699 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2700 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, I,
2701 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2702 IsOrdered, DL) {}
2703
2705 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2706 bool IsOrdered, DebugLoc DL = DebugLoc::getUnknown())
2707 : VPReductionRecipe(VPDef::VPReductionSC, RdxKind, FMFs, nullptr,
2708 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2709 IsOrdered, DL) {}
2710
2711 ~VPReductionRecipe() override = default;
2712
2714 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
2716 getCondOp(), IsOrdered, getDebugLoc());
2717 }
2718
2719 static inline bool classof(const VPRecipeBase *R) {
2720 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2721 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC ||
2722 R->getVPDefID() == VPRecipeBase::VPPartialReductionSC;
2723 }
2724
2725 static inline bool classof(const VPUser *U) {
2726 auto *R = dyn_cast<VPRecipeBase>(U);
2727 return R && classof(R);
2728 }
2729
2730 static inline bool classof(const VPValue *VPV) {
2731 const VPRecipeBase *R = VPV->getDefiningRecipe();
2732 return R && classof(R);
2733 }
2734
2735 static inline bool classof(const VPSingleDefRecipe *R) {
2736 return classof(static_cast<const VPRecipeBase *>(R));
2737 }
2738
2739 /// Generate the reduction in the loop.
2740 void execute(VPTransformState &State) override;
2741
2742 /// Return the cost of VPReductionRecipe.
2743 InstructionCost computeCost(ElementCount VF,
2744 VPCostContext &Ctx) const override;
2745
2746#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2747 /// Print the recipe.
2748 void print(raw_ostream &O, const Twine &Indent,
2749 VPSlotTracker &SlotTracker) const override;
2750#endif
2751
2752 /// Return the recurrence kind for the in-loop reduction.
2753 RecurKind getRecurrenceKind() const { return RdxKind; }
2754 /// Return true if the in-loop reduction is ordered.
2755 bool isOrdered() const { return IsOrdered; };
2756 /// Return true if the in-loop reduction is conditional.
2757 bool isConditional() const { return IsConditional; };
2758 /// The VPValue of the scalar Chain being accumulated.
2759 VPValue *getChainOp() const { return getOperand(0); }
2760 /// The VPValue of the vector value to be reduced.
2761 VPValue *getVecOp() const { return getOperand(1); }
2762 /// The VPValue of the condition for the block.
2764 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2765 }
2766};
2767
2768/// A recipe for forming partial reductions. In the loop, an accumulator and
2769/// vector operand are added together and passed to the next iteration as the
2770/// next accumulator. After the loop body, the accumulator is reduced to a
2771/// scalar value.
2773 unsigned Opcode;
2774
2775 /// The divisor by which the VF of this recipe's output should be divided
2776 /// during execution.
2777 unsigned VFScaleFactor;
2778
2779public:
2781 VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
2782 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1, Cond,
2783 VFScaleFactor, ReductionInst) {}
2784 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2785 VPValue *Cond, unsigned ScaleFactor,
2786 Instruction *ReductionInst = nullptr)
2787 : VPReductionRecipe(VPDef::VPPartialReductionSC, RecurKind::Add,
2788 FastMathFlags(), ReductionInst,
2789 ArrayRef<VPValue *>({Op0, Op1}), Cond, false, {}),
2790 Opcode(Opcode), VFScaleFactor(ScaleFactor) {
2791 [[maybe_unused]] auto *AccumulatorRecipe =
2793 // When cloning as part of a VPExpressionRecipe the chain op could have
2794 // replaced by a temporary VPValue, so it doesn't have a defining recipe.
2795 assert((!AccumulatorRecipe ||
2796 isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2797 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2798 "Unexpected operand order for partial reduction recipe");
2799 }
2800 ~VPPartialReductionRecipe() override = default;
2801
2803 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2804 getCondOp(), VFScaleFactor,
2806 }
2807
2808 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2809
2810 /// Generate the reduction in the loop.
2811 void execute(VPTransformState &State) override;
2812
2813 /// Return the cost of this VPPartialReductionRecipe.
2815 VPCostContext &Ctx) const override;
2816
2817 /// Get the binary op's opcode.
2818 unsigned getOpcode() const { return Opcode; }
2819
2820 /// Get the factor that the VF of this recipe's output should be scaled by.
2821 unsigned getVFScaleFactor() const { return VFScaleFactor; }
2822
2823#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2824 /// Print the recipe.
2825 void print(raw_ostream &O, const Twine &Indent,
2826 VPSlotTracker &SlotTracker) const override;
2827#endif
2828};
2829
2830/// A recipe to represent inloop reduction operations with vector-predication
2831/// intrinsics, performing a reduction on a vector operand with the explicit
2832/// vector length (EVL) into a scalar value, and adding the result to a chain.
2833/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2835public:
2839 VPDef::VPReductionEVLSC, R.getRecurrenceKind(),
2840 R.getFastMathFlags(),
2842 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2843 R.isOrdered(), DL) {}
2844
2845 ~VPReductionEVLRecipe() override = default;
2846
2848 llvm_unreachable("cloning not implemented yet");
2849 }
2850
2851 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2852
2853 /// Generate the reduction in the loop
2854 void execute(VPTransformState &State) override;
2855
2856#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2857 /// Print the recipe.
2858 void print(raw_ostream &O, const Twine &Indent,
2859 VPSlotTracker &SlotTracker) const override;
2860#endif
2861
2862 /// The VPValue of the explicit vector length.
2863 VPValue *getEVL() const { return getOperand(2); }
2864
2865 /// Returns true if the recipe only uses the first lane of operand \p Op.
2866 bool usesFirstLaneOnly(const VPValue *Op) const override {
2868 "Op must be an operand of the recipe");
2869 return Op == getEVL();
2870 }
2871};
2872
2873/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2874/// copies of the original scalar type, one per lane, instead of producing a
2875/// single copy of widened type for all lanes. If the instruction is known to be
2876/// a single scalar, only one copy, per lane zero, will be generated.
2878 public VPIRMetadata {
2879 /// Indicator if only a single replica per lane is needed.
2880 bool IsSingleScalar;
2881
2882 /// Indicator if the replicas are also predicated.
2883 bool IsPredicated;
2884
2885public:
2887 bool IsSingleScalar, VPValue *Mask = nullptr,
2888 VPIRMetadata Metadata = {})
2889 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2890 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
2891 IsPredicated(Mask) {
2892 if (Mask)
2893 addOperand(Mask);
2894 }
2895
2896 ~VPReplicateRecipe() override = default;
2897
2899 auto *Copy =
2900 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsSingleScalar,
2901 isPredicated() ? getMask() : nullptr, *this);
2902 Copy->transferFlags(*this);
2903 return Copy;
2904 }
2905
2906 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2907
2908 /// Generate replicas of the desired Ingredient. Replicas will be generated
2909 /// for all parts and lanes unless a specific part and lane are specified in
2910 /// the \p State.
2911 void execute(VPTransformState &State) override;
2912
2913 /// Return the cost of this VPReplicateRecipe.
2914 InstructionCost computeCost(ElementCount VF,
2915 VPCostContext &Ctx) const override;
2916
2917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2918 /// Print the recipe.
2919 void print(raw_ostream &O, const Twine &Indent,
2920 VPSlotTracker &SlotTracker) const override;
2921#endif
2922
2923 bool isSingleScalar() const { return IsSingleScalar; }
2924
2925 bool isPredicated() const { return IsPredicated; }
2926
2927 /// Returns true if the recipe only uses the first lane of operand \p Op.
2928 bool usesFirstLaneOnly(const VPValue *Op) const override {
2930 "Op must be an operand of the recipe");
2931 return isSingleScalar();
2932 }
2933
2934 /// Returns true if the recipe uses scalars of operand \p Op.
2935 bool usesScalars(const VPValue *Op) const override {
2937 "Op must be an operand of the recipe");
2938 return true;
2939 }
2940
2941 /// Returns true if the recipe is used by a widened recipe via an intervening
2942 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2943 /// in a vector.
2944 bool shouldPack() const;
2945
2946 /// Return the mask of a predicated VPReplicateRecipe.
2948 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2949 return getOperand(getNumOperands() - 1);
2950 }
2951
2952 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2953};
2954
2955/// A recipe for generating conditional branches on the bits of a mask.
2957public:
2959 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {BlockInMask}, DL) {}
2960
2963 }
2964
2965 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2966
2967 /// Generate the extraction of the appropriate bit from the block mask and the
2968 /// conditional branch.
2969 void execute(VPTransformState &State) override;
2970
2971 /// Return the cost of this VPBranchOnMaskRecipe.
2972 InstructionCost computeCost(ElementCount VF,
2973 VPCostContext &Ctx) const override;
2974
2975#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2976 /// Print the recipe.
2977 void print(raw_ostream &O, const Twine &Indent,
2978 VPSlotTracker &SlotTracker) const override {
2979 O << Indent << "BRANCH-ON-MASK ";
2981 }
2982#endif
2983
2984 /// Returns true if the recipe uses scalars of operand \p Op.
2985 bool usesScalars(const VPValue *Op) const override {
2987 "Op must be an operand of the recipe");
2988 return true;
2989 }
2990};
2991
2992/// A recipe to combine multiple recipes into a single 'expression' recipe,
2993/// which should be considered a single entity for cost-modeling and transforms.
2994/// The recipe needs to be 'decomposed', i.e. replaced by its individual
2995/// expression recipes, before execute. The individual expression recipes are
2996/// completely disconnected from the def-use graph of other recipes not part of
2997/// the expression. Def-use edges between pairs of expression recipes remain
2998/// intact, whereas every edge between an expression recipe and a recipe outside
2999/// the expression is elevated to connect the non-expression recipe with the
3000/// VPExpressionRecipe itself.
3001class VPExpressionRecipe : public VPSingleDefRecipe {
3002 /// Recipes included in this VPExpressionRecipe. This could contain
3003 /// duplicates.
3004 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3005
3006 /// Temporary VPValues used for external operands of the expression, i.e.
3007 /// operands not defined by recipes in the expression.
3008 SmallVector<VPValue *> LiveInPlaceholders;
3009
3010 enum class ExpressionTypes {
3011 /// Represents an inloop extended reduction operation, performing a
3012 /// reduction on an extended vector operand into a scalar value, and adding
3013 /// the result to a chain.
3014 ExtendedReduction,
3015 /// Represent an inloop multiply-accumulate reduction, multiplying the
3016 /// extended vector operands, performing a reduction.add on the result, and
3017 /// adding the scalar result to a chain.
3018 ExtMulAccReduction,
3019 /// Represent an inloop multiply-accumulate reduction, multiplying the
3020 /// vector operands, performing a reduction.add on the result, and adding
3021 /// the scalar result to a chain.
3022 MulAccReduction,
3023 /// Represent an inloop multiply-accumulate reduction, multiplying the
3024 /// extended vector operands, negating the multiplication, performing a
3025 /// reduction.add on the result, and adding the scalar result to a chain.
3026 ExtNegatedMulAccReduction,
3027 };
3028
3029 /// Type of the expression.
3030 ExpressionTypes ExpressionType;
3031
3032 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3033 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3034 /// in the expression) are replaced by temporary VPValues and the original
3035 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3036 /// as needed (excluding last) to ensure they are only used by other recipes
3037 /// in the expression.
3038 VPExpressionRecipe(ExpressionTypes ExpressionType,
3039 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3040
3041public:
3043 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3045 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3048 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3049 {Ext0, Ext1, Mul, Red}) {}
3052 VPReductionRecipe *Red)
3053 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3054 {Ext0, Ext1, Mul, Sub, Red}) {
3055 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3056 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3057 "Expected an add reduction");
3058 assert(getNumOperands() >= 3 && "Expected at least three operands");
3059 [[maybe_unused]] auto *SubConst = dyn_cast<ConstantInt>(getOperand(2)->getLiveInIRValue());
3060 assert(SubConst && SubConst->getValue() == 0 &&
3061 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3062 }
3063
3065 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3066 for (auto *R : reverse(ExpressionRecipes)) {
3067 if (ExpressionRecipesSeen.insert(R).second)
3068 delete R;
3069 }
3070 for (VPValue *T : LiveInPlaceholders)
3071 delete T;
3072 }
3073
3074 VP_CLASSOF_IMPL(VPDef::VPExpressionSC)
3075
3076 VPExpressionRecipe *clone() override {
3077 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3078 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3079 for (auto *R : ExpressionRecipes)
3080 NewExpressiondRecipes.push_back(R->clone());
3081 for (auto *New : NewExpressiondRecipes) {
3082 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3083 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3084 // Update placeholder operands in the cloned recipe to use the external
3085 // operands, to be internalized when the cloned expression is constructed.
3086 for (const auto &[Placeholder, OutsideOp] :
3087 zip(LiveInPlaceholders, operands()))
3088 New->replaceUsesOfWith(Placeholder, OutsideOp);
3089 }
3090 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3091 }
3092
3093 /// Return the VPValue to use to infer the result type of the recipe.
3095 unsigned OpIdx =
3096 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3097 : 1;
3098 return getOperand(getNumOperands() - OpIdx);
3099 }
3100
3101 /// Insert the recipes of the expression back into the VPlan, directly before
3102 /// the current recipe. Leaves the expression recipe empty, which must be
3103 /// removed before codegen.
3104 void decompose();
3105
3106 unsigned getVFScaleFactor() const {
3107 auto *PR = dyn_cast<VPPartialReductionRecipe>(ExpressionRecipes.back());
3108 return PR ? PR->getVFScaleFactor() : 1;
3109 }
3110
3111 /// Method for generating code, must not be called as this recipe is abstract.
3112 void execute(VPTransformState &State) override {
3113 llvm_unreachable("recipe must be removed before execute");
3114 }
3115
3117 VPCostContext &Ctx) const override;
3118
3119#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3120 /// Print the recipe.
3121 void print(raw_ostream &O, const Twine &Indent,
3122 VPSlotTracker &SlotTracker) const override;
3123#endif
3124
3125 /// Returns true if this expression contains recipes that may read from or
3126 /// write to memory.
3127 bool mayReadOrWriteMemory() const;
3128
3129 /// Returns true if this expression contains recipes that may have side
3130 /// effects.
3131 bool mayHaveSideEffects() const;
3132
3133 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3134 bool isSingleScalar() const;
3135};
3136
3137/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3138/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3139/// order to merge values that are set under such a branch and feed their uses.
3140/// The phi nodes can be scalar or vector depending on the users of the value.
3141/// This recipe works in concert with VPBranchOnMaskRecipe.
3143public:
3144 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3145 /// nodes after merging back from a Branch-on-Mask.
3147 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
3148 ~VPPredInstPHIRecipe() override = default;
3149
3151 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3152 }
3153
3154 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
3155
3156 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3157 /// retain SSA form.
3158 void execute(VPTransformState &State) override;
3159
3160 /// Return the cost of this VPPredInstPHIRecipe.
3162 VPCostContext &Ctx) const override {
3163 // TODO: Compute accurate cost after retiring the legacy cost model.
3164 return 0;
3165 }
3166
3167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3168 /// Print the recipe.
3169 void print(raw_ostream &O, const Twine &Indent,
3170 VPSlotTracker &SlotTracker) const override;
3171#endif
3172
3173 /// Returns true if the recipe uses scalars of operand \p Op.
3174 bool usesScalars(const VPValue *Op) const override {
3176 "Op must be an operand of the recipe");
3177 return true;
3178 }
3179};
3180
3181/// A common base class for widening memory operations. An optional mask can be
3182/// provided as the last operand.
3184 public VPIRMetadata {
3185protected:
3187
3188 /// Alignment information for this memory access.
3190
3191 /// Whether the accessed addresses are consecutive.
3193
3194 /// Whether the consecutive accessed addresses are in reverse order.
3196
3197 /// Whether the memory access is masked.
3198 bool IsMasked = false;
3199
3200 void setMask(VPValue *Mask) {
3201 assert(!IsMasked && "cannot re-set mask");
3202 if (!Mask)
3203 return;
3204 addOperand(Mask);
3205 IsMasked = true;
3206 }
3207
3208 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3209 std::initializer_list<VPValue *> Operands,
3210 bool Consecutive, bool Reverse,
3211 const VPIRMetadata &Metadata, DebugLoc DL)
3212 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3214 Reverse(Reverse) {
3215 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3217 "Reversed acccess without VPVectorEndPointerRecipe address?");
3218 }
3219
3220public:
3222 llvm_unreachable("cloning not supported");
3223 }
3224
3225 static inline bool classof(const VPRecipeBase *R) {
3226 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
3227 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
3228 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
3229 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
3230 }
3231
3232 static inline bool classof(const VPUser *U) {
3233 auto *R = dyn_cast<VPRecipeBase>(U);
3234 return R && classof(R);
3235 }
3236
3237 /// Return whether the loaded-from / stored-to addresses are consecutive.
3238 bool isConsecutive() const { return Consecutive; }
3239
3240 /// Return whether the consecutive loaded/stored addresses are in reverse
3241 /// order.
3242 bool isReverse() const { return Reverse; }
3243
3244 /// Return the address accessed by this recipe.
3245 VPValue *getAddr() const { return getOperand(0); }
3246
3247 /// Returns true if the recipe is masked.
3248 bool isMasked() const { return IsMasked; }
3249
3250 /// Return the mask used by this recipe. Note that a full mask is represented
3251 /// by a nullptr.
3252 VPValue *getMask() const {
3253 // Mask is optional and therefore the last operand.
3254 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3255 }
3256
3257 /// Returns the alignment of the memory access.
3258 Align getAlign() const { return Alignment; }
3259
3260 /// Generate the wide load/store.
3261 void execute(VPTransformState &State) override {
3262 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3263 }
3264
3265 /// Return the cost of this VPWidenMemoryRecipe.
3266 InstructionCost computeCost(ElementCount VF,
3267 VPCostContext &Ctx) const override;
3268
3270};
3271
3272/// A recipe for widening load operations, using the address to load from and an
3273/// optional mask.
3275 public VPValue {
3277 bool Consecutive, bool Reverse,
3278 const VPIRMetadata &Metadata, DebugLoc DL)
3279 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3280 Reverse, Metadata, DL),
3281 VPValue(this, &Load) {
3282 setMask(Mask);
3283 }
3284
3287 getMask(), Consecutive, Reverse, *this,
3288 getDebugLoc());
3289 }
3290
3291 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3292
3293 /// Generate a wide load or gather.
3294 void execute(VPTransformState &State) override;
3295
3296#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3297 /// Print the recipe.
3298 void print(raw_ostream &O, const Twine &Indent,
3299 VPSlotTracker &SlotTracker) const override;
3300#endif
3301
3302 /// Returns true if the recipe only uses the first lane of operand \p Op.
3303 bool usesFirstLaneOnly(const VPValue *Op) const override {
3305 "Op must be an operand of the recipe");
3306 // Widened, consecutive loads operations only demand the first lane of
3307 // their address.
3308 return Op == getAddr() && isConsecutive();
3309 }
3310};
3311
3312/// A recipe for widening load operations with vector-predication intrinsics,
3313/// using the address to load from, the explicit vector length and an optional
3314/// mask.
3315struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3317 VPValue *Mask)
3318 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3319 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3320 L.getDebugLoc()),
3321 VPValue(this, &getIngredient()) {
3322 setMask(Mask);
3323 }
3324
3325 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3326
3327 /// Return the EVL operand.
3328 VPValue *getEVL() const { return getOperand(1); }
3329
3330 /// Generate the wide load or gather.
3331 void execute(VPTransformState &State) override;
3332
3333 /// Return the cost of this VPWidenLoadEVLRecipe.
3335 VPCostContext &Ctx) const override;
3336
3337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3338 /// Print the recipe.
3339 void print(raw_ostream &O, const Twine &Indent,
3340 VPSlotTracker &SlotTracker) const override;
3341#endif
3342
3343 /// Returns true if the recipe only uses the first lane of operand \p Op.
3344 bool usesFirstLaneOnly(const VPValue *Op) const override {
3346 "Op must be an operand of the recipe");
3347 // Widened loads only demand the first lane of EVL and consecutive loads
3348 // only demand the first lane of their address.
3349 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3350 }
3351};
3352
3353/// A recipe for widening store operations, using the stored value, the address
3354/// to store to and an optional mask.
3356 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3357 VPValue *Mask, bool Consecutive, bool Reverse,
3358 const VPIRMetadata &Metadata, DebugLoc DL)
3359 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3360 Consecutive, Reverse, Metadata, DL) {
3361 setMask(Mask);
3362 }
3363
3369
3370 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3371
3372 /// Return the value stored by this recipe.
3373 VPValue *getStoredValue() const { return getOperand(1); }
3374
3375 /// Generate a wide store or scatter.
3376 void execute(VPTransformState &State) override;
3377
3378#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3379 /// Print the recipe.
3380 void print(raw_ostream &O, const Twine &Indent,
3381 VPSlotTracker &SlotTracker) const override;
3382#endif
3383
3384 /// Returns true if the recipe only uses the first lane of operand \p Op.
3385 bool usesFirstLaneOnly(const VPValue *Op) const override {
3387 "Op must be an operand of the recipe");
3388 // Widened, consecutive stores only demand the first lane of their address,
3389 // unless the same operand is also stored.
3390 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3391 }
3392};
3393
3394/// A recipe for widening store operations with vector-predication intrinsics,
3395/// using the value to store, the address to store to, the explicit vector
3396/// length and an optional mask.
3399 VPValue *Mask)
3400 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3401 {Addr, S.getStoredValue(), &EVL}, S.isConsecutive(),
3402 S.isReverse(), S, S.getDebugLoc()) {
3403 setMask(Mask);
3404 }
3405
3406 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3407
3408 /// Return the address accessed by this recipe.
3409 VPValue *getStoredValue() const { return getOperand(1); }
3410
3411 /// Return the EVL operand.
3412 VPValue *getEVL() const { return getOperand(2); }
3413
3414 /// Generate the wide store or scatter.
3415 void execute(VPTransformState &State) override;
3416
3417 /// Return the cost of this VPWidenStoreEVLRecipe.
3419 VPCostContext &Ctx) const override;
3420
3421#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3422 /// Print the recipe.
3423 void print(raw_ostream &O, const Twine &Indent,
3424 VPSlotTracker &SlotTracker) const override;
3425#endif
3426
3427 /// Returns true if the recipe only uses the first lane of operand \p Op.
3428 bool usesFirstLaneOnly(const VPValue *Op) const override {
3430 "Op must be an operand of the recipe");
3431 if (Op == getEVL()) {
3432 assert(getStoredValue() != Op && "unexpected store of EVL");
3433 return true;
3434 }
3435 // Widened, consecutive memory operations only demand the first lane of
3436 // their address, unless the same operand is also stored. That latter can
3437 // happen with opaque pointers.
3438 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3439 }
3440};
3441
3442/// Recipe to expand a SCEV expression.
3444 const SCEV *Expr;
3445
3446public:
3448 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr) {}
3449
3450 ~VPExpandSCEVRecipe() override = default;
3451
3452 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3453
3454 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3455
3456 void execute(VPTransformState &State) override {
3457 llvm_unreachable("SCEV expressions must be expanded before final execute");
3458 }
3459
3460 /// Return the cost of this VPExpandSCEVRecipe.
3462 VPCostContext &Ctx) const override {
3463 // TODO: Compute accurate cost after retiring the legacy cost model.
3464 return 0;
3465 }
3466
3467#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3468 /// Print the recipe.
3469 void print(raw_ostream &O, const Twine &Indent,
3470 VPSlotTracker &SlotTracker) const override;
3471#endif
3472
3473 const SCEV *getSCEV() const { return Expr; }
3474};
3475
3476/// Canonical scalar induction phi of the vector loop. Starting at the specified
3477/// start value (either 0 or the resume value when vectorizing the epilogue
3478/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3479/// canonical induction variable.
3481public:
3483 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3484
3485 ~VPCanonicalIVPHIRecipe() override = default;
3486
3488 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3489 R->addOperand(getBackedgeValue());
3490 return R;
3491 }
3492
3493 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3494
3495 void execute(VPTransformState &State) override {
3496 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3497 "scalar phi recipe");
3498 }
3499
3500#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3501 /// Print the recipe.
3502 void print(raw_ostream &O, const Twine &Indent,
3503 VPSlotTracker &SlotTracker) const override;
3504#endif
3505
3506 /// Returns the scalar type of the induction.
3508 return getStartValue()->getLiveInIRValue()->getType();
3509 }
3510
3511 /// Returns true if the recipe only uses the first lane of operand \p Op.
3512 bool usesFirstLaneOnly(const VPValue *Op) const override {
3514 "Op must be an operand of the recipe");
3515 return true;
3516 }
3517
3518 /// Returns true if the recipe only uses the first part of operand \p Op.
3519 bool usesFirstPartOnly(const VPValue *Op) const override {
3521 "Op must be an operand of the recipe");
3522 return true;
3523 }
3524
3525 /// Return the cost of this VPCanonicalIVPHIRecipe.
3527 VPCostContext &Ctx) const override {
3528 // For now, match the behavior of the legacy cost model.
3529 return 0;
3530 }
3531};
3532
3533/// A recipe for generating the active lane mask for the vector loop that is
3534/// used to predicate the vector operations.
3535/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3536/// remove VPActiveLaneMaskPHIRecipe.
3538public:
3540 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3541 DL) {}
3542
3543 ~VPActiveLaneMaskPHIRecipe() override = default;
3544
3547 if (getNumOperands() == 2)
3548 R->addOperand(getOperand(1));
3549 return R;
3550 }
3551
3552 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3553
3554 /// Generate the active lane mask phi of the vector loop.
3555 void execute(VPTransformState &State) override;
3556
3557#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3558 /// Print the recipe.
3559 void print(raw_ostream &O, const Twine &Indent,
3560 VPSlotTracker &SlotTracker) const override;
3561#endif
3562};
3563
3564/// A recipe for generating the phi node for the current index of elements,
3565/// adjusted in accordance with EVL value. It starts at the start value of the
3566/// canonical induction and gets incremented by EVL in each iteration of the
3567/// vector loop.
3569public:
3571 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3572
3573 ~VPEVLBasedIVPHIRecipe() override = default;
3574
3576 llvm_unreachable("cloning not implemented yet");
3577 }
3578
3579 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3580
3581 void execute(VPTransformState &State) override {
3582 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3583 "scalar phi recipe");
3584 }
3585
3586 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3588 VPCostContext &Ctx) const override {
3589 // For now, match the behavior of the legacy cost model.
3590 return 0;
3591 }
3592
3593 /// Returns true if the recipe only uses the first lane of operand \p Op.
3594 bool usesFirstLaneOnly(const VPValue *Op) const override {
3596 "Op must be an operand of the recipe");
3597 return true;
3598 }
3599
3600#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3601 /// Print the recipe.
3602 void print(raw_ostream &O, const Twine &Indent,
3603 VPSlotTracker &SlotTracker) const override;
3604#endif
3605};
3606
3607/// A Recipe for widening the canonical induction variable of the vector loop.
3609 public VPUnrollPartAccessor<1> {
3610public:
3612 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3613
3614 ~VPWidenCanonicalIVRecipe() override = default;
3615
3620
3621 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3622
3623 /// Generate a canonical vector induction variable of the vector loop, with
3624 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3625 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3626 void execute(VPTransformState &State) override;
3627
3628 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3630 VPCostContext &Ctx) const override {
3631 // TODO: Compute accurate cost after retiring the legacy cost model.
3632 return 0;
3633 }
3634
3635#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3636 /// Print the recipe.
3637 void print(raw_ostream &O, const Twine &Indent,
3638 VPSlotTracker &SlotTracker) const override;
3639#endif
3640};
3641
3642/// A recipe for converting the input value \p IV value to the corresponding
3643/// value of an IV with different start and step values, using Start + IV *
3644/// Step.
3646 /// Kind of the induction.
3648 /// If not nullptr, the floating point induction binary operator. Must be set
3649 /// for floating point inductions.
3650 const FPMathOperator *FPBinOp;
3651
3652 /// Name to use for the generated IR instruction for the derived IV.
3653 std::string Name;
3654
3655public:
3657 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3658 const Twine &Name = "")
3660 IndDesc.getKind(),
3661 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3662 Start, CanonicalIV, Step, Name) {}
3663
3665 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3666 VPValue *Step, const Twine &Name = "")
3667 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3668 FPBinOp(FPBinOp), Name(Name.str()) {}
3669
3670 ~VPDerivedIVRecipe() override = default;
3671
3673 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3674 getStepValue());
3675 }
3676
3677 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3678
3679 /// Generate the transformed value of the induction at offset StartValue (1.
3680 /// operand) + IV (2. operand) * StepValue (3, operand).
3681 void execute(VPTransformState &State) override;
3682
3683 /// Return the cost of this VPDerivedIVRecipe.
3685 VPCostContext &Ctx) const override {
3686 // TODO: Compute accurate cost after retiring the legacy cost model.
3687 return 0;
3688 }
3689
3690#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3691 /// Print the recipe.
3692 void print(raw_ostream &O, const Twine &Indent,
3693 VPSlotTracker &SlotTracker) const override;
3694#endif
3695
3697 return getStartValue()->getLiveInIRValue()->getType();
3698 }
3699
3700 VPValue *getStartValue() const { return getOperand(0); }
3701 VPValue *getStepValue() const { return getOperand(2); }
3702
3703 /// Returns true if the recipe only uses the first lane of operand \p Op.
3704 bool usesFirstLaneOnly(const VPValue *Op) const override {
3706 "Op must be an operand of the recipe");
3707 return true;
3708 }
3709};
3710
3711/// A recipe for handling phi nodes of integer and floating-point inductions,
3712/// producing their scalar values.
3714 public VPUnrollPartAccessor<3> {
3715 Instruction::BinaryOps InductionOpcode;
3716
3717public:
3720 DebugLoc DL)
3721 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3722 ArrayRef<VPValue *>({IV, Step, VF}), FMFs, DL),
3723 InductionOpcode(Opcode) {}
3724
3726 VPValue *Step, VPValue *VF,
3729 IV, Step, VF, IndDesc.getInductionOpcode(),
3730 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3731 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3732 : FastMathFlags(),
3733 DL) {}
3734
3735 ~VPScalarIVStepsRecipe() override = default;
3736
3738 return new VPScalarIVStepsRecipe(
3739 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
3741 getDebugLoc());
3742 }
3743
3744 /// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3745 /// this is only accurate after the VPlan has been unrolled.
3746 bool isPart0() const { return getUnrollPart(*this) == 0; }
3747
3748 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3749
3750 /// Generate the scalarized versions of the phi node as needed by their users.
3751 void execute(VPTransformState &State) override;
3752
3753 /// Return the cost of this VPScalarIVStepsRecipe.
3755 VPCostContext &Ctx) const override {
3756 // TODO: Compute accurate cost after retiring the legacy cost model.
3757 return 0;
3758 }
3759
3760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3761 /// Print the recipe.
3762 void print(raw_ostream &O, const Twine &Indent,
3763 VPSlotTracker &SlotTracker) const override;
3764#endif
3765
3766 VPValue *getStepValue() const { return getOperand(1); }
3767
3768 /// Returns true if the recipe only uses the first lane of operand \p Op.
3769 bool usesFirstLaneOnly(const VPValue *Op) const override {
3771 "Op must be an operand of the recipe");
3772 return true;
3773 }
3774};
3775
3776/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
3777/// types implementing VPPhiAccessors. Used by isa<> & co.
3779 static inline bool isPossible(const VPRecipeBase *f) {
3780 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
3782 }
3783};
3784/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
3785/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
3786template <typename SrcTy>
3787struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
3788
3790
3791 /// doCast is used by cast<>.
3792 static inline VPPhiAccessors *doCast(SrcTy R) {
3793 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
3794 switch (R->getVPDefID()) {
3795 case VPDef::VPInstructionSC:
3796 return cast<VPPhi>(R);
3797 case VPDef::VPIRInstructionSC:
3798 return cast<VPIRPhi>(R);
3799 case VPDef::VPWidenPHISC:
3800 return cast<VPWidenPHIRecipe>(R);
3801 default:
3802 return cast<VPHeaderPHIRecipe>(R);
3803 }
3804 }());
3805 }
3806
3807 /// doCastIfPossible is used by dyn_cast<>.
3808 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
3809 if (!Self::isPossible(f))
3810 return nullptr;
3811 return doCast(f);
3812 }
3813};
3814template <>
3817template <>
3820
3821/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3822/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3823/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3824class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
3825 friend class VPlan;
3826
3827 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3828 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3829 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3830 if (Recipe)
3831 appendRecipe(Recipe);
3832 }
3833
3834public:
3836
3837protected:
3838 /// The VPRecipes held in the order of output instructions to generate.
3840
3841 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3842 : VPBlockBase(BlockSC, Name.str()) {}
3843
3844public:
3845 ~VPBasicBlock() override {
3846 while (!Recipes.empty())
3847 Recipes.pop_back();
3848 }
3849
3850 /// Instruction iterators...
3855
3856 //===--------------------------------------------------------------------===//
3857 /// Recipe iterator methods
3858 ///
3859 inline iterator begin() { return Recipes.begin(); }
3860 inline const_iterator begin() const { return Recipes.begin(); }
3861 inline iterator end() { return Recipes.end(); }
3862 inline const_iterator end() const { return Recipes.end(); }
3863
3864 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3865 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3866 inline reverse_iterator rend() { return Recipes.rend(); }
3867 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3868
3869 inline size_t size() const { return Recipes.size(); }
3870 inline bool empty() const { return Recipes.empty(); }
3871 inline const VPRecipeBase &front() const { return Recipes.front(); }
3872 inline VPRecipeBase &front() { return Recipes.front(); }
3873 inline const VPRecipeBase &back() const { return Recipes.back(); }
3874 inline VPRecipeBase &back() { return Recipes.back(); }
3875
3876 /// Returns a reference to the list of recipes.
3878
3879 /// Returns a pointer to a member of the recipe list.
3880 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
3881 return &VPBasicBlock::Recipes;
3882 }
3883
3884 /// Method to support type inquiry through isa, cast, and dyn_cast.
3885 static inline bool classof(const VPBlockBase *V) {
3886 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3887 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3888 }
3889
3890 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3891 assert(Recipe && "No recipe to append.");
3892 assert(!Recipe->Parent && "Recipe already in VPlan");
3893 Recipe->Parent = this;
3894 Recipes.insert(InsertPt, Recipe);
3895 }
3896
3897 /// Augment the existing recipes of a VPBasicBlock with an additional
3898 /// \p Recipe as the last recipe.
3899 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3900
3901 /// The method which generates the output IR instructions that correspond to
3902 /// this VPBasicBlock, thereby "executing" the VPlan.
3903 void execute(VPTransformState *State) override;
3904
3905 /// Return the cost of this VPBasicBlock.
3906 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
3907
3908 /// Return the position of the first non-phi node recipe in the block.
3909 iterator getFirstNonPhi();
3910
3911 /// Returns an iterator range over the PHI-like recipes in the block.
3915
3916 /// Split current block at \p SplitAt by inserting a new block between the
3917 /// current block and its successors and moving all recipes starting at
3918 /// SplitAt to the new block. Returns the new block.
3919 VPBasicBlock *splitAt(iterator SplitAt);
3920
3921 VPRegionBlock *getEnclosingLoopRegion();
3922 const VPRegionBlock *getEnclosingLoopRegion() const;
3923
3924#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3925 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3926 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3927 ///
3928 /// Note that the numbering is applied to the whole VPlan, so printing
3929 /// individual blocks is consistent with the whole VPlan printing.
3930 void print(raw_ostream &O, const Twine &Indent,
3931 VPSlotTracker &SlotTracker) const override;
3932 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3933#endif
3934
3935 /// If the block has multiple successors, return the branch recipe terminating
3936 /// the block. If there are no or only a single successor, return nullptr;
3937 VPRecipeBase *getTerminator();
3938 const VPRecipeBase *getTerminator() const;
3939
3940 /// Returns true if the block is exiting it's parent region.
3941 bool isExiting() const;
3942
3943 /// Clone the current block and it's recipes, without updating the operands of
3944 /// the cloned recipes.
3945 VPBasicBlock *clone() override;
3946
3947 /// Returns the predecessor block at index \p Idx with the predecessors as per
3948 /// the corresponding plain CFG. If the block is an entry block to a region,
3949 /// the first predecessor is the single predecessor of a region, and the
3950 /// second predecessor is the exiting block of the region.
3951 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
3952
3953protected:
3954 /// Execute the recipes in the IR basic block \p BB.
3955 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3956
3957 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3958 /// generated for this VPBB.
3959 void connectToPredecessors(VPTransformState &State);
3960
3961private:
3962 /// Create an IR BasicBlock to hold the output instructions generated by this
3963 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3964 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
3965};
3966
3967inline const VPBasicBlock *
3969 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
3970}
3971
3972/// A special type of VPBasicBlock that wraps an existing IR basic block.
3973/// Recipes of the block get added before the first non-phi instruction in the
3974/// wrapped block.
3975/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3976/// preheader block.
3977class VPIRBasicBlock : public VPBasicBlock {
3978 friend class VPlan;
3979
3980 BasicBlock *IRBB;
3981
3982 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3983 VPIRBasicBlock(BasicBlock *IRBB)
3984 : VPBasicBlock(VPIRBasicBlockSC,
3985 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3986 IRBB(IRBB) {}
3987
3988public:
3989 ~VPIRBasicBlock() override = default;
3990
3991 static inline bool classof(const VPBlockBase *V) {
3992 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3993 }
3994
3995 /// The method which generates the output IR instructions that correspond to
3996 /// this VPBasicBlock, thereby "executing" the VPlan.
3997 void execute(VPTransformState *State) override;
3998
3999 VPIRBasicBlock *clone() override;
4000
4001 BasicBlock *getIRBasicBlock() const { return IRBB; }
4002};
4003
4004/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4005/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4006/// A VPRegionBlock may indicate that its contents are to be replicated several
4007/// times. This is designed to support predicated scalarization, in which a
4008/// scalar if-then code structure needs to be generated VF * UF times. Having
4009/// this replication indicator helps to keep a single model for multiple
4010/// candidate VF's. The actual replication takes place only once the desired VF
4011/// and UF have been determined.
4012class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4013 friend class VPlan;
4014
4015 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4016 VPBlockBase *Entry;
4017
4018 /// Hold the Single Exiting block of the SESE region modelled by the
4019 /// VPRegionBlock.
4020 VPBlockBase *Exiting;
4021
4022 /// An indicator whether this region is to generate multiple replicated
4023 /// instances of output IR corresponding to its VPBlockBases.
4024 bool IsReplicator;
4025
4026 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4027 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4028 const std::string &Name = "", bool IsReplicator = false)
4029 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4030 IsReplicator(IsReplicator) {
4031 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4032 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4033 Entry->setParent(this);
4034 Exiting->setParent(this);
4035 }
4036 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4037 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4038 IsReplicator(IsReplicator) {}
4039
4040public:
4041 ~VPRegionBlock() override = default;
4042
4043 /// Method to support type inquiry through isa, cast, and dyn_cast.
4044 static inline bool classof(const VPBlockBase *V) {
4045 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4046 }
4047
4048 const VPBlockBase *getEntry() const { return Entry; }
4049 VPBlockBase *getEntry() { return Entry; }
4050
4051 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4052 /// EntryBlock must have no predecessors.
4053 void setEntry(VPBlockBase *EntryBlock) {
4054 assert(EntryBlock->getPredecessors().empty() &&
4055 "Entry block cannot have predecessors.");
4056 Entry = EntryBlock;
4057 EntryBlock->setParent(this);
4058 }
4059
4060 const VPBlockBase *getExiting() const { return Exiting; }
4061 VPBlockBase *getExiting() { return Exiting; }
4062
4063 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4064 /// ExitingBlock must have no successors.
4065 void setExiting(VPBlockBase *ExitingBlock) {
4066 assert(ExitingBlock->getSuccessors().empty() &&
4067 "Exit block cannot have successors.");
4068 Exiting = ExitingBlock;
4069 ExitingBlock->setParent(this);
4070 }
4071
4072 /// Returns the pre-header VPBasicBlock of the loop region.
4074 assert(!isReplicator() && "should only get pre-header of loop regions");
4075 return getSinglePredecessor()->getExitingBasicBlock();
4076 }
4077
4078 /// An indicator whether this region is to generate multiple replicated
4079 /// instances of output IR corresponding to its VPBlockBases.
4080 bool isReplicator() const { return IsReplicator; }
4081
4082 /// The method which generates the output IR instructions that correspond to
4083 /// this VPRegionBlock, thereby "executing" the VPlan.
4084 void execute(VPTransformState *State) override;
4085
4086 // Return the cost of this region.
4087 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4088
4089#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4090 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4091 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4092 /// consequtive numbers.
4093 ///
4094 /// Note that the numbering is applied to the whole VPlan, so printing
4095 /// individual regions is consistent with the whole VPlan printing.
4096 void print(raw_ostream &O, const Twine &Indent,
4097 VPSlotTracker &SlotTracker) const override;
4098 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4099#endif
4100
4101 /// Clone all blocks in the single-entry single-exit region of the block and
4102 /// their recipes without updating the operands of the cloned recipes.
4103 VPRegionBlock *clone() override;
4104
4105 /// Remove the current region from its VPlan, connecting its predecessor to
4106 /// its entry, and its exiting block to its successor.
4107 void dissolveToCFGLoop();
4108
4109 /// Returns the canonical induction recipe of the region.
4111 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4112 if (EntryVPBB->empty()) {
4113 // VPlan native path. TODO: Unify both code paths.
4114 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4115 }
4116 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4117 }
4119 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4120 }
4121
4122 /// Return the type of the canonical IV for loop regions.
4123 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4124 const Type *getCanonicalIVType() const {
4125 return getCanonicalIV()->getScalarType();
4126 }
4127};
4128
4130 return getParent()->getParent();
4131}
4132
4134 return getParent()->getParent();
4135}
4136
4137/// VPlan models a candidate for vectorization, encoding various decisions take
4138/// to produce efficient output IR, including which branches, basic-blocks and
4139/// output IR instructions to generate, and their cost. VPlan holds a
4140/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4141/// VPBasicBlock.
4142class VPlan {
4143 friend class VPlanPrinter;
4144 friend class VPSlotTracker;
4145
4146 /// VPBasicBlock corresponding to the original preheader. Used to place
4147 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4148 /// rest of VPlan execution.
4149 /// When this VPlan is used for the epilogue vector loop, the entry will be
4150 /// replaced by a new entry block created during skeleton creation.
4151 VPBasicBlock *Entry;
4152
4153 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4154 VPIRBasicBlock *ScalarHeader;
4155
4156 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4157 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4158 /// e.g. if the scalar epilogue always executes.
4160
4161 /// Holds the VFs applicable to this VPlan.
4163
4164 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4165 /// any UF.
4167
4168 /// Holds the name of the VPlan, for printing.
4169 std::string Name;
4170
4171 /// Represents the trip count of the original loop, for folding
4172 /// the tail.
4173 VPValue *TripCount = nullptr;
4174
4175 /// Represents the backedge taken count of the original loop, for folding
4176 /// the tail. It equals TripCount - 1.
4177 VPValue *BackedgeTakenCount = nullptr;
4178
4179 /// Represents the vector trip count.
4180 VPValue VectorTripCount;
4181
4182 /// Represents the vectorization factor of the loop.
4183 VPValue VF;
4184
4185 /// Represents the loop-invariant VF * UF of the vector loop region.
4186 VPValue VFxUF;
4187
4188 /// Holds a mapping between Values and their corresponding VPValue inside
4189 /// VPlan.
4190 Value2VPValueTy Value2VPValue;
4191
4192 /// Contains all the external definitions created for this VPlan. External
4193 /// definitions are VPValues that hold a pointer to their underlying IR.
4195
4196 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4197 /// VPlan is destroyed.
4198 SmallVector<VPBlockBase *> CreatedBlocks;
4199
4200 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4201 /// wrapping the original header of the scalar loop.
4202 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4203 : Entry(Entry), ScalarHeader(ScalarHeader) {
4204 Entry->setPlan(this);
4205 assert(ScalarHeader->getNumSuccessors() == 0 &&
4206 "scalar header must be a leaf node");
4207 }
4208
4209public:
4210 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4211 /// original preheader and scalar header of \p L, to be used as entry and
4212 /// scalar header blocks of the new VPlan.
4213 VPlan(Loop *L);
4214
4215 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4216 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4217 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
4218 setEntry(createVPBasicBlock("preheader"));
4219 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4220 TripCount = TC;
4221 }
4222
4224
4226 Entry = VPBB;
4227 VPBB->setPlan(this);
4228 }
4229
4230 /// Generate the IR code for this VPlan.
4231 void execute(VPTransformState *State);
4232
4233 /// Return the cost of this plan.
4235
4236 VPBasicBlock *getEntry() { return Entry; }
4237 const VPBasicBlock *getEntry() const { return Entry; }
4238
4239 /// Returns the preheader of the vector loop region, if one exists, or null
4240 /// otherwise.
4242 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4243 return VectorRegion
4244 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4245 : nullptr;
4246 }
4247
4248 /// Returns the VPRegionBlock of the vector loop.
4251
4252 /// Returns the 'middle' block of the plan, that is the block that selects
4253 /// whether to execute the scalar tail loop or the exit block from the loop
4254 /// latch. If there is an early exit from the vector loop, the middle block
4255 /// conceptully has the early exit block as third successor, split accross 2
4256 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4257 /// tail loop or the exit bock. If the scalar tail loop or exit block are
4258 /// known to always execute, the middle block may branch directly to that
4259 /// block. This function cannot be called once the vector loop region has been
4260 /// removed.
4262 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4263 assert(
4264 LoopRegion &&
4265 "cannot call the function after vector loop region has been removed");
4266 auto *RegionSucc = cast<VPBasicBlock>(LoopRegion->getSingleSuccessor());
4267 if (RegionSucc->getSingleSuccessor() ||
4268 is_contained(RegionSucc->getSuccessors(), getScalarPreheader()))
4269 return RegionSucc;
4270 // There is an early exit. The successor of RegionSucc is the middle block.
4271 return cast<VPBasicBlock>(RegionSucc->getSuccessors()[1]);
4272 }
4273
4275 return const_cast<VPlan *>(this)->getMiddleBlock();
4276 }
4277
4278 /// Return the VPBasicBlock for the preheader of the scalar loop.
4280 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4281 }
4282
4283 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4284 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4285
4286 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4287 /// the original scalar loop.
4288 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4289
4290 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4291 /// exit block.
4293
4294 /// Returns true if \p VPBB is an exit block.
4295 bool isExitBlock(VPBlockBase *VPBB);
4296
4297 /// The trip count of the original loop.
4299 assert(TripCount && "trip count needs to be set before accessing it");
4300 return TripCount;
4301 }
4302
4303 /// Set the trip count assuming it is currently null; if it is not - use
4304 /// resetTripCount().
4305 void setTripCount(VPValue *NewTripCount) {
4306 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4307 TripCount = NewTripCount;
4308 }
4309
4310 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4311 /// the original trip count have been replaced.
4312 void resetTripCount(VPValue *NewTripCount) {
4313 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4314 "TripCount must be set when resetting");
4315 TripCount = NewTripCount;
4316 }
4317
4318 /// The backedge taken count of the original loop.
4320 if (!BackedgeTakenCount)
4321 BackedgeTakenCount = new VPValue();
4322 return BackedgeTakenCount;
4323 }
4324 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4325
4326 /// The vector trip count.
4327 VPValue &getVectorTripCount() { return VectorTripCount; }
4328
4329 /// Returns the VF of the vector loop region.
4330 VPValue &getVF() { return VF; };
4331 const VPValue &getVF() const { return VF; };
4332
4333 /// Returns VF * UF of the vector loop region.
4334 VPValue &getVFxUF() { return VFxUF; }
4335
4338 }
4339
4340 void addVF(ElementCount VF) { VFs.insert(VF); }
4341
4343 assert(hasVF(VF) && "Cannot set VF not already in plan");
4344 VFs.clear();
4345 VFs.insert(VF);
4346 }
4347
4348 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4349 bool hasScalableVF() const {
4350 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4351 }
4352
4353 /// Returns an iterator range over all VFs of the plan.
4356 return VFs;
4357 }
4358
4359 bool hasScalarVFOnly() const {
4360 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4361 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4362 "Plan with scalar VF should only have a single VF");
4363 return HasScalarVFOnly;
4364 }
4365
4366 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4367
4368 unsigned getUF() const {
4369 assert(UFs.size() == 1 && "Expected a single UF");
4370 return UFs[0];
4371 }
4372
4373 void setUF(unsigned UF) {
4374 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4375 UFs.clear();
4376 UFs.insert(UF);
4377 }
4378
4379 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4380 /// concrete UF.
4381 bool isUnrolled() const { return UFs.size() == 1; }
4382
4383 /// Return a string with the name of the plan and the applicable VFs and UFs.
4384 std::string getName() const;
4385
4386 void setName(const Twine &newName) { Name = newName.str(); }
4387
4388 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4389 /// yet) for \p V.
4391 assert(V && "Trying to get or add the VPValue of a null Value");
4392 auto [It, Inserted] = Value2VPValue.try_emplace(V);
4393 if (Inserted) {
4394 VPValue *VPV = new VPValue(V);
4395 VPLiveIns.push_back(VPV);
4396 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4397 It->second = VPV;
4398 }
4399
4400 assert(It->second->isLiveIn() && "Only live-ins should be in mapping");
4401 return It->second;
4402 }
4403
4404 /// Return a VPValue wrapping i1 true.
4405 VPValue *getTrue() { return getConstantInt(1, 1); }
4406
4407 /// Return a VPValue wrapping i1 false.
4408 VPValue *getFalse() { return getConstantInt(1, 0); }
4409
4410 /// Return a VPValue wrapping a ConstantInt with the given type and value.
4411 VPValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4412 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4413 }
4414
4415 /// Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
4417 bool IsSigned = false) {
4418 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4419 }
4420
4421 /// Return a VPValue wrapping a ConstantInt with the given APInt value.
4423 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4424 }
4425
4426 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4427 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4428
4429 /// Return the list of live-in VPValues available in the VPlan.
4431 assert(all_of(Value2VPValue,
4432 [this](const auto &P) {
4433 return is_contained(VPLiveIns, P.second);
4434 }) &&
4435 "all VPValues in Value2VPValue must also be in VPLiveIns");
4436 return VPLiveIns;
4437 }
4438
4439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4440 /// Print the live-ins of this VPlan to \p O.
4441 void printLiveIns(raw_ostream &O) const;
4442
4443 /// Print this VPlan to \p O.
4444 void print(raw_ostream &O) const;
4445
4446 /// Print this VPlan in DOT format to \p O.
4447 void printDOT(raw_ostream &O) const;
4448
4449 /// Dump the plan to stderr (for debugging).
4450 LLVM_DUMP_METHOD void dump() const;
4451#endif
4452
4453 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4454 /// recipes to refer to the clones, and return it.
4455 VPlan *duplicate();
4456
4457 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4458 /// present. The returned block is owned by the VPlan and deleted once the
4459 /// VPlan is destroyed.
4461 VPRecipeBase *Recipe = nullptr) {
4462 auto *VPB = new VPBasicBlock(Name, Recipe);
4463 CreatedBlocks.push_back(VPB);
4464 return VPB;
4465 }
4466
4467 /// Create a new loop region with \p Name and entry and exiting blocks set
4468 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4469 /// owned by the VPlan and deleted once the VPlan is destroyed.
4470 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4471 VPBlockBase *Entry = nullptr,
4472 VPBlockBase *Exiting = nullptr) {
4473 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4474 : new VPRegionBlock(Name);
4475 CreatedBlocks.push_back(VPB);
4476 return VPB;
4477 }
4478
4479 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4480 /// returned block is owned by the VPlan and deleted once the VPlan is
4481 /// destroyed.
4483 const std::string &Name = "") {
4484 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4485 CreatedBlocks.push_back(VPB);
4486 return VPB;
4487 }
4488
4489 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4490 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4491 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4493
4494 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4495 /// instructions in \p IRBB, except its terminator which is managed by the
4496 /// successors of the block in VPlan. The returned block is owned by the VPlan
4497 /// and deleted once the VPlan is destroyed.
4499
4500 /// Returns true if the VPlan is based on a loop with an early exit. That is
4501 /// the case if the VPlan has either more than one exit block or a single exit
4502 /// block with multiple predecessors (one for the exit via the latch and one
4503 /// via the other early exit).
4504 bool hasEarlyExit() const {
4505 return count_if(ExitBlocks,
4506 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4507 1 ||
4508 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4509 }
4510
4511 /// Returns true if the scalar tail may execute after the vector loop. Note
4512 /// that this relies on unneeded branches to the scalar tail loop being
4513 /// removed.
4514 bool hasScalarTail() const {
4515 return !(!getScalarPreheader()->hasPredecessors() ||
4517 }
4518};
4519
4520#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4521inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4522 Plan.print(OS);
4523 return OS;
4524}
4525#endif
4526
4527} // end namespace llvm
4528
4529#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
This file defines the DenseMap class.
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
const SmallVectorImpl< MachineOperand > & Cond
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition VPlan.h:495
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:610
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:162
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1078
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition ModRef.h:221
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition ModRef.h:218
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class represents the LLVM 'select' instruction.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:338
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3545
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3539
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:3824
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:3852
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:3899
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:3854
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:3851
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:3877
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:3835
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:3841
iterator end()
Definition VPlan.h:3861
iterator begin()
Recipe iterator methods.
Definition VPlan.h:3859
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:3853
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:3912
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:770
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:216
~VPBasicBlock() override
Definition VPlan.h:3845
const_reverse_iterator rbegin() const
Definition VPlan.h:3865
reverse_iterator rend()
Definition VPlan.h:3866
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:3839
VPRecipeBase & back()
Definition VPlan.h:3874
const VPRecipeBase & front() const
Definition VPlan.h:3871
const_iterator begin() const
Definition VPlan.h:3860
VPRecipeBase & front()
Definition VPlan.h:3872
const VPRecipeBase & back() const
Definition VPlan.h:3873
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:3890
bool empty() const
Definition VPlan.h:3870
const_iterator end() const
Definition VPlan.h:3862
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:3885
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:3880
reverse_iterator rbegin()
Definition VPlan.h:3864
friend class VPlan
Definition VPlan.h:3825
size_t size() const
Definition VPlan.h:3869
const_reverse_iterator rend() const
Definition VPlan.h:3867
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2441
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2446
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2436
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2457
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2472
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2423
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2418
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2452
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2432
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:80
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:299
VPRegionBlock * getParent()
Definition VPlan.h:172
VPBlocksTy & getPredecessors()
Definition VPlan.h:204
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:201
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:369
void setName(const Twine &newName)
Definition VPlan.h:165
size_t getNumSuccessors() const
Definition VPlan.h:218
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:200
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:222
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:321
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:645
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:159
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:257
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:334
size_t getNumPredecessors() const
Definition VPlan.h:219
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:290
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:208
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:327
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:203
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:157
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:180
const VPRegionBlock * getParent() const
Definition VPlan.h:173
const std::string & getName() const
Definition VPlan.h:163
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:309
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:247
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:281
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:214
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:241
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:306
friend class VPBlockUtils
Definition VPlan.h:81
unsigned getVPBlockID() const
Definition VPlan.h:170
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:348
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:313
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:149
VPBlocksTy & getSuccessors()
Definition VPlan.h:198
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:200
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:166
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:270
void setParent(VPRegionBlock *P)
Definition VPlan.h:183
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:263
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:208
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:197
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:2977
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2961
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2985
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:2958
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3480
~VPCanonicalIVPHIRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3512
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3487
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3519
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition VPlan.h:3482
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3507
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3495
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3526
This class augments a recipe with a set of VPValues defined by the recipe.
Definition VPlanValue.h:302
friend class VPValue
Definition VPlanValue.h:303
VPDef(const unsigned char SC)
Definition VPlanValue.h:382
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3684
VPValue * getStepValue() const
Definition VPlan.h:3701
Type * getScalarType() const
Definition VPlan.h:3696
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3672
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3664
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3704
VPValue * getStartValue() const
Definition VPlan.h:3700
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3656
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3594
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3575
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3581
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition VPlan.h:3587
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3570
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3456
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3461
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3447
const SCEV * getSCEV() const
Definition VPlan.h:3473
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3452
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3112
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3094
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3076
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3064
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3050
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3042
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3046
unsigned getVFScaleFactor() const
Definition VPlan.h:3106
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3044
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1994
static bool classof(const VPValue *V)
Definition VPlan.h:2004
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2035
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2040
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2024
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2032
VPValue * getStartValue() const
Definition VPlan.h:2027
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2000
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2044
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1989
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1705
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:1722
unsigned getOpcode() const
Definition VPlan.h:1718
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1699
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:3977
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:446
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4001
static bool classof(const VPBlockBase *V)
Definition VPlan.h:3991
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:3978
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:471
Class to record and manage LLVM IR flags.
Definition VPlan.h:596
FastMathFlagsTy FMFs
Definition VPlan.h:660
bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:709
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:701
WrapFlagsTy WrapFlags
Definition VPlan.h:654
CmpInst::Predicate CmpPredicate
Definition VPlan.h:653
void printFlags(raw_ostream &O) const
GEPNoWrapFlags GEPFlags
Definition VPlan.h:658
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:818
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
TruncFlagsTy TruncFlags
Definition VPlan.h:655
CmpInst::Predicate getPredicate() const
Definition VPlan.h:800
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:823
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:718
ExactFlagsTy ExactFlags
Definition VPlan.h:657
bool hasNoSignedWrap() const
Definition VPlan.h:842
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:853
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:704
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:707
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:712
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:698
bool isNonNeg() const
Definition VPlan.h:825
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:812
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:815
DisjointFlagsTy DisjointFlags
Definition VPlan.h:656
unsigned AllFlags
Definition VPlan.h:661
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:806
bool hasNoUnsignedWrap() const
Definition VPlan.h:831
NonNegFlagsTy NonNegFlags
Definition VPlan.h:659
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:728
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:763
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:715
VPIRFlags(Instruction &I)
Definition VPlan.h:667
Instruction & getInstruction() const
Definition VPlan.h:1387
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1401
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void extractLastLaneOfFirstOperand(VPBuilder &Builder)
Update the recipes first operand to the last lane of the operand using Builder.
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1374
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1407
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1395
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1362
Helper to manage IR metadata for recipes.
Definition VPlan.h:938
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:946
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetada object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void addMetadata(unsigned Kind, MDNode *Node)
Add metadata with kind Kind and Node.
Definition VPlan.h:961
void applyMetadata(Instruction &I) const
Add all metadata to I.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1243
static bool classof(const VPUser *R)
Definition VPlan.h:1228
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1210
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, DebugLoc DL, const VPIRFlags &Flags, const VPIRMetadata &Metadata, const Twine &Name="")
Definition VPlan.h:1204
Type * getResultType() const
Definition VPlan.h:1249
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1198
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1232
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:981
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1106
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1117
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1065
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1019
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1055
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1068
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1016
@ FirstOrderRecurrenceSplice
Definition VPlan.h:987
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1059
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1011
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1008
@ VScale
Returns the value for vscale.
Definition VPlan.h:1070
@ CanonicalIVIncrementForPart
Definition VPlan.h:1001
@ CalculateTripCountMinusVF
Definition VPlan.h:999
bool hasResult() const
Definition VPlan.h:1145
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1185
unsigned getOpcode() const
Definition VPlan.h:1125
friend class VPlanSlp
Definition VPlan.h:982
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2551
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2557
static bool classof(const VPUser *U)
Definition VPlan.h:2533
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2500
Instruction * getInsertPos() const
Definition VPlan.h:2555
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2528
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2553
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2545
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2574
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2539
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2660
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2648
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2667
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2641
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2628
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2585
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2618
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2595
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2612
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2587
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned VFScaleFactor)
Definition VPlan.h:2780
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, VPValue *Cond, unsigned ScaleFactor, Instruction *ReductionInst=nullptr)
Definition VPlan.h:2784
~VPPartialReductionRecipe() override=default
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2821
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition VPlan.h:2818
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2802
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1260
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1282
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1277
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:3968
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1302
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1269
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1287
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1291
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3174
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3150
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3161
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3146
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:386
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:473
VPRegionBlock * getRegion()
Definition VPlan.h:4129
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:484
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:407
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:478
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:453
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:388
const VPBasicBlock * getParent() const
Definition VPlan.h:408
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:458
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:397
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2863
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2836
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2866
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2847
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2397
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2366
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by.
Definition VPlan.h:2380
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2356
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2391
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition VPlan.h:2400
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2403
void execute(VPTransformState &State) override
Generate the phi/select nodes.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2394
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition VPlan.h:2675
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:2757
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2719
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2704
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2735
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:2761
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:2763
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:2753
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:2755
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:2759
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2697
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2713
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition VPlan.h:2683
static bool classof(const VPUser *U)
Definition VPlan.h:2725
static bool classof(const VPValue *VPV)
Definition VPlan.h:2730
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4012
const VPBlockBase * getEntry() const
Definition VPlan.h:4048
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4123
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4080
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4065
VPBlockBase * getExiting()
Definition VPlan.h:4061
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4110
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4053
const Type * getCanonicalIVType() const
Definition VPlan.h:4124
const VPBlockBase * getExiting() const
Definition VPlan.h:4060
VPBlockBase * getEntry()
Definition VPlan.h:4049
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4118
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4073
friend class VPlan
Definition VPlan.h:4013
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4044
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:2878
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, VPIRMetadata Metadata={})
Definition VPlan.h:2886
bool isSingleScalar() const
Definition VPlan.h:2923
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:2935
bool isPredicated() const
Definition VPlan.h:2925
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2898
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2928
unsigned getOpcode() const
Definition VPlan.h:2952
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:2947
VPValue * getStepValue() const
Definition VPlan.h:3766
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:3754
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3725
bool isPart0() const
Return true if this VPScalarIVStepsRecipe corresponds to part 0.
Definition VPlan.h:3746
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3737
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3718
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3769
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:517
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:523
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:582
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:527
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:585
static bool classof(const VPUser *U)
Definition VPlan.h:574
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:519
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:926
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:199
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1421
operand_range operands()
Definition VPlanValue.h:267
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:243
unsigned getNumOperands() const
Definition VPlanValue.h:237
operand_iterator op_end()
Definition VPlanValue.h:265
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:238
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:218
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:261
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:260
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:48
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:131
friend class VPExpressionRecipe
Definition VPlanValue.h:53
Value * getLiveInIRValue() const
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition VPlanValue.h:176
friend class VPDef
Definition VPlanValue.h:49
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:85
VPValue(const unsigned char SC, Value *UV=nullptr, VPDef *Def=nullptr)
Definition VPlan.cpp:94
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:186
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition VPlanValue.h:171
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1874
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1895
const VPValue * getVFValue() const
Definition VPlan.h:1870
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1888
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:1881
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1859
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool isFirstPart() const
Return true if this VPVectorPointerRecipe corresponds to part 0.
Definition VPlan.h:1947
Type * getSourceElementType() const
Definition VPlan.h:1924
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1926
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:1933
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:1914
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:1950
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1940
const_operand_range args() const
Definition VPlan.h:1680
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1661
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1648
operand_range args()
Definition VPlan.h:1679
Function * getCalledScalarFunction() const
Definition VPlan.h:1675
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3629
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3616
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3611
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1494
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition VPlan.h:1502
Instruction::CastOps getOpcode() const
Definition VPlan.h:1545
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1510
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1548
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1522
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:1816
Type * getSourceElementType() const
Definition VPlan.h:1821
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1797
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:1824
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1808
~VPWidenGEPRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1837
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2114
static bool classof(const VPValue *V)
Definition VPlan.h:2068
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2084
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2099
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2092
PHINode * getPHINode() const
Definition VPlan.h:2094
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2056
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2080
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2097
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2106
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2063
static bool classof(const VPHeaderPHIRecipe *R)
Definition VPlan.h:2073
const VPValue * getVFValue() const
Definition VPlan.h:2087
const VPValue * getStepValue() const
Definition VPlan.h:2081
const TruncInst * getTruncInst() const
Definition VPlan.h:2192
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2167
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition VPlan.h:2143
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2159
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2191
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2134
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2208
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2187
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2200
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1578
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1613
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1622
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1569
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1628
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1595
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1625
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1616
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3198
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3195
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3238
static bool classof(const VPUser *U)
Definition VPlan.h:3232
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3261
Instruction & Ingredient
Definition VPlan.h:3186
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3221
Instruction & getIngredient() const
Definition VPlan.h:3269
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3192
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3225
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3252
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3189
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3248
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3208
void setMask(VPValue *Mask)
Definition VPlan.h:3200
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3258
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3245
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3242
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2268
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2273
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2280
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2232
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2242
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2220
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1451
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1467
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1461
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:1455
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1484
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4142
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1107
friend class VPSlotTracker
Definition VPlan.h:4144
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1083
bool hasVF(ElementCount VF) const
Definition VPlan.h:4348
LLVMContext & getContext() const
Definition VPlan.h:4336
VPBasicBlock * getEntry()
Definition VPlan.h:4236
VPValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4327
void setName(const Twine &newName)
Definition VPlan.h:4386
bool hasScalableVF() const
Definition VPlan.h:4349
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4334
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4330
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4298
VPValue * getTrue()
Return a VPValue wrapping i1 true.
Definition VPlan.h:4405
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4319
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4355
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4217
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:891
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:868
const VPValue & getVF() const
Definition VPlan.h:4331
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:899
const VPBasicBlock * getEntry() const
Definition VPlan.h:4237
friend class VPlanPrinter
Definition VPlan.h:4143
VPValue * getConstantInt(const APInt &Val)
Return a VPValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4422
unsigned getUF() const
Definition VPlan.h:4368
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4482
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1221
bool hasUF(unsigned UF) const
Definition VPlan.h:4366
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4288
VPValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4411
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4324
void setVF(ElementCount VF)
Definition VPlan.h:4342
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4381
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1012
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4504
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:994
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4274
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4305
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4312
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4261
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4225
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4460
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1227
VPValue * getFalse()
Return a VPValue wrapping i1 false.
Definition VPlan.h:4408
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4390
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4470
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1113
bool hasScalarVFOnly() const
Definition VPlan.h:4359
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4279
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:906
ArrayRef< VPValue * > getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4430
void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1066
void addVF(ElementCount VF)
Definition VPlan.h:4340
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4284
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4427
VPValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4416
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1028
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4241
void setUF(unsigned UF)
Definition VPlan.h:4373
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4514
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1154
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
Increasing range of size_t indices.
Definition STLExtras.h:2425
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:839
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:296
auto map_range(ContainerTy &&C, FuncTy F)
Definition STLExtras.h:364
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1961
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
DenseMap< Value *, VPValue * > Value2VPValueTy
Definition VPlanValue.h:192
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:76
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:3787
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:3808
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:3789
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:3792
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:3779
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2311
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition VPlan.h:2306
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2329
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:626
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:631
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:621
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:614
PHINode & getIRPhi()
Definition VPlan.h:1432
VPIRPhi(PHINode &PN)
Definition VPlan.h:1425
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1427
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1443
static bool classof(const VPUser *U)
Definition VPlan.h:1320
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1335
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1350
VPPhi(ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1317
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1330
static bool classof(const VPValue *V)
Definition VPlan.h:1325
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:871
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:885
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Instruction &I)
Definition VPlan.h:876
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:880
static bool classof(const VPValue *V)
Definition VPlan.h:905
static bool classof(const VPSingleDefRecipe *U)
Definition VPlan.h:912
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:900
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:872
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3328
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3316
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3344
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3275
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3303
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3276
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3285
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1744
VPWidenSelectRecipe(SelectInst &I, ArrayRef< VPValue * > Operands)
Definition VPlan.h:1738
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1771
VPValue * getCond() const
Definition VPlan.h:1766
unsigned getOpcode() const
Definition VPlan.h:1764
~VPWidenSelectRecipe() override=default
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3409
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3428
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3398
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3412
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3355
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3373
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3364
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3385
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3356