LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class VPlanSlp;
69class Value;
71
72struct VPCostContext;
73
74namespace Intrinsic {
75typedef unsigned ID;
76}
77
78using VPlanPtr = std::unique_ptr<VPlan>;
79
80/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
81/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
83 friend class VPBlockUtils;
84
85 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
86
87 /// An optional name for the block.
88 std::string Name;
89
90 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
91 /// it is a topmost VPBlockBase.
92 VPRegionBlock *Parent = nullptr;
93
94 /// List of predecessor blocks.
96
97 /// List of successor blocks.
99
100 /// VPlan containing the block. Can only be set on the entry block of the
101 /// plan.
102 VPlan *Plan = nullptr;
103
104 /// Add \p Successor as the last successor to this block.
105 void appendSuccessor(VPBlockBase *Successor) {
106 assert(Successor && "Cannot add nullptr successor!");
107 Successors.push_back(Successor);
108 }
109
110 /// Add \p Predecessor as the last predecessor to this block.
111 void appendPredecessor(VPBlockBase *Predecessor) {
112 assert(Predecessor && "Cannot add nullptr predecessor!");
113 Predecessors.push_back(Predecessor);
114 }
115
116 /// Remove \p Predecessor from the predecessors of this block.
117 void removePredecessor(VPBlockBase *Predecessor) {
118 auto Pos = find(Predecessors, Predecessor);
119 assert(Pos && "Predecessor does not exist");
120 Predecessors.erase(Pos);
121 }
122
123 /// Remove \p Successor from the successors of this block.
124 void removeSuccessor(VPBlockBase *Successor) {
125 auto Pos = find(Successors, Successor);
126 assert(Pos && "Successor does not exist");
127 Successors.erase(Pos);
128 }
129
130 /// This function replaces one predecessor with another, useful when
131 /// trying to replace an old block in the CFG with a new one.
132 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
133 auto I = find(Predecessors, Old);
134 assert(I != Predecessors.end());
135 assert(Old->getParent() == New->getParent() &&
136 "replaced predecessor must have the same parent");
137 *I = New;
138 }
139
140 /// This function replaces one successor with another, useful when
141 /// trying to replace an old block in the CFG with a new one.
142 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
143 auto I = find(Successors, Old);
144 assert(I != Successors.end());
145 assert(Old->getParent() == New->getParent() &&
146 "replaced successor must have the same parent");
147 *I = New;
148 }
149
150protected:
151 VPBlockBase(const unsigned char SC, const std::string &N)
152 : SubclassID(SC), Name(N) {}
153
154public:
155 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
156 /// that are actually instantiated. Values of this enumeration are kept in the
157 /// SubclassID field of the VPBlockBase objects. They are used for concrete
158 /// type identification.
159 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
160
162
163 virtual ~VPBlockBase() = default;
164
165 const std::string &getName() const { return Name; }
166
167 void setName(const Twine &newName) { Name = newName.str(); }
168
169 /// \return an ID for the concrete type of this object.
170 /// This is used to implement the classof checks. This should not be used
171 /// for any other purpose, as the values may change as LLVM evolves.
172 unsigned getVPBlockID() const { return SubclassID; }
173
174 VPRegionBlock *getParent() { return Parent; }
175 const VPRegionBlock *getParent() const { return Parent; }
176
177 /// \return A pointer to the plan containing the current block.
178 VPlan *getPlan();
179 const VPlan *getPlan() const;
180
181 /// Sets the pointer of the plan containing the block. The block must be the
182 /// entry block into the VPlan.
183 void setPlan(VPlan *ParentPlan);
184
185 void setParent(VPRegionBlock *P) { Parent = P; }
186
187 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
188 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
189 /// VPBlockBase is a VPBasicBlock, it is returned.
190 const VPBasicBlock *getEntryBasicBlock() const;
191 VPBasicBlock *getEntryBasicBlock();
192
193 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
194 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
195 /// VPBlockBase is a VPBasicBlock, it is returned.
196 const VPBasicBlock *getExitingBasicBlock() const;
197 VPBasicBlock *getExitingBasicBlock();
198
199 const VPBlocksTy &getSuccessors() const { return Successors; }
200 VPBlocksTy &getSuccessors() { return Successors; }
201
204
205 const VPBlocksTy &getPredecessors() const { return Predecessors; }
206 VPBlocksTy &getPredecessors() { return Predecessors; }
207
208 /// \return the successor of this VPBlockBase if it has a single successor.
209 /// Otherwise return a null pointer.
211 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
212 }
213
214 /// \return the predecessor of this VPBlockBase if it has a single
215 /// predecessor. Otherwise return a null pointer.
217 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
218 }
219
220 size_t getNumSuccessors() const { return Successors.size(); }
221 size_t getNumPredecessors() const { return Predecessors.size(); }
222
223 /// Returns true if this block has any predecessors.
224 bool hasPredecessors() const { return !Predecessors.empty(); }
225
226 /// An Enclosing Block of a block B is any block containing B, including B
227 /// itself. \return the closest enclosing block starting from "this", which
228 /// has successors. \return the root enclosing block if all enclosing blocks
229 /// have no successors.
230 VPBlockBase *getEnclosingBlockWithSuccessors();
231
232 /// \return the closest enclosing block starting from "this", which has
233 /// predecessors. \return the root enclosing block if all enclosing blocks
234 /// have no predecessors.
235 VPBlockBase *getEnclosingBlockWithPredecessors();
236
237 /// \return the successors either attached directly to this VPBlockBase or, if
238 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
239 /// successors of its own, search recursively for the first enclosing
240 /// VPRegionBlock that has successors and return them. If no such
241 /// VPRegionBlock exists, return the (empty) successors of the topmost
242 /// VPBlockBase reached.
244 return getEnclosingBlockWithSuccessors()->getSuccessors();
245 }
246
247 /// \return the hierarchical successor of this VPBlockBase if it has a single
248 /// hierarchical successor. Otherwise return a null pointer.
250 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
251 }
252
253 /// \return the predecessors either attached directly to this VPBlockBase or,
254 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
255 /// predecessors of its own, search recursively for the first enclosing
256 /// VPRegionBlock that has predecessors and return them. If no such
257 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
258 /// VPBlockBase reached.
260 return getEnclosingBlockWithPredecessors()->getPredecessors();
261 }
262
263 /// \return the hierarchical predecessor of this VPBlockBase if it has a
264 /// single hierarchical predecessor. Otherwise return a null pointer.
268
269 /// Set a given VPBlockBase \p Successor as the single successor of this
270 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
271 /// This VPBlockBase must have no successors.
273 assert(Successors.empty() && "Setting one successor when others exist.");
274 assert(Successor->getParent() == getParent() &&
275 "connected blocks must have the same parent");
276 appendSuccessor(Successor);
277 }
278
279 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
280 /// successors of this VPBlockBase. This VPBlockBase is not added as
281 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
282 /// successors.
283 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
284 assert(Successors.empty() && "Setting two successors when others exist.");
285 appendSuccessor(IfTrue);
286 appendSuccessor(IfFalse);
287 }
288
289 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
290 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
291 /// as successor of any VPBasicBlock in \p NewPreds.
293 assert(Predecessors.empty() && "Block predecessors already set.");
294 for (auto *Pred : NewPreds)
295 appendPredecessor(Pred);
296 }
297
298 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
299 /// This VPBlockBase must have no successors. This VPBlockBase is not added
300 /// as predecessor of any VPBasicBlock in \p NewSuccs.
302 assert(Successors.empty() && "Block successors already set.");
303 for (auto *Succ : NewSuccs)
304 appendSuccessor(Succ);
305 }
306
307 /// Remove all the predecessor of this block.
308 void clearPredecessors() { Predecessors.clear(); }
309
310 /// Remove all the successors of this block.
311 void clearSuccessors() { Successors.clear(); }
312
313 /// Swap predecessors of the block. The block must have exactly 2
314 /// predecessors.
316 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
317 std::swap(Predecessors[0], Predecessors[1]);
318 }
319
320 /// Swap successors of the block. The block must have exactly 2 successors.
321 // TODO: This should be part of introducing conditional branch recipes rather
322 // than being independent.
324 assert(Successors.size() == 2 && "must have 2 successors to swap");
325 std::swap(Successors[0], Successors[1]);
326 }
327
328 /// Returns the index for \p Pred in the blocks predecessors list.
329 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
330 assert(count(Predecessors, Pred) == 1 &&
331 "must have Pred exactly once in Predecessors");
332 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
333 }
334
335 /// Returns the index for \p Succ in the blocks successor list.
336 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
337 assert(count(Successors, Succ) == 1 &&
338 "must have Succ exactly once in Successors");
339 return std::distance(Successors.begin(), find(Successors, Succ));
340 }
341
342 /// The method which generates the output IR that correspond to this
343 /// VPBlockBase, thereby "executing" the VPlan.
344 virtual void execute(VPTransformState *State) = 0;
345
346 /// Return the cost of the block.
348
349#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
350 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
351 OS << getName();
352 }
353
354 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
355 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
356 /// consequtive numbers.
357 ///
358 /// Note that the numbering is applied to the whole VPlan, so printing
359 /// individual blocks is consistent with the whole VPlan printing.
360 virtual void print(raw_ostream &O, const Twine &Indent,
361 VPSlotTracker &SlotTracker) const = 0;
362
363 /// Print plain-text dump of this VPlan to \p O.
364 void print(raw_ostream &O) const;
365
366 /// Print the successors of this block to \p O, prefixing all lines with \p
367 /// Indent.
368 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
369
370 /// Dump this VPBlockBase to dbgs().
371 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
372#endif
373
374 /// Clone the current block and it's recipes without updating the operands of
375 /// the cloned recipes, including all blocks in the single-entry single-exit
376 /// region for VPRegionBlocks.
377 virtual VPBlockBase *clone() = 0;
378};
379
380/// VPRecipeBase is a base class modeling a sequence of one or more output IR
381/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
382/// and is responsible for deleting its defined values. Single-value
383/// recipes must inherit from VPSingleDef instead of inheriting from both
384/// VPRecipeBase and VPValue separately.
386 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
387 public VPDef,
388 public VPUser {
389 friend VPBasicBlock;
390 friend class VPBlockUtils;
391
392 /// Subclass identifier (for isa/dyn_cast).
393 const unsigned char SubclassID;
394
395 /// Each VPRecipe belongs to a single VPBasicBlock.
396 VPBasicBlock *Parent = nullptr;
397
398 /// The debug location for the recipe.
399 DebugLoc DL;
400
401public:
402 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
403 /// that is actually instantiated. Values of this enumeration are kept in the
404 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
405 /// type identification.
406 using VPRecipeTy = enum {
407 VPBranchOnMaskSC,
408 VPDerivedIVSC,
409 VPExpandSCEVSC,
410 VPExpressionSC,
411 VPIRInstructionSC,
412 VPInstructionSC,
413 VPInterleaveEVLSC,
414 VPInterleaveSC,
415 VPReductionEVLSC,
416 VPReductionSC,
417 VPReplicateSC,
418 VPScalarIVStepsSC,
419 VPVectorPointerSC,
420 VPVectorEndPointerSC,
421 VPWidenCallSC,
422 VPWidenCanonicalIVSC,
423 VPWidenCastSC,
424 VPWidenGEPSC,
425 VPWidenIntrinsicSC,
426 VPWidenLoadEVLSC,
427 VPWidenLoadSC,
428 VPWidenStoreEVLSC,
429 VPWidenStoreSC,
430 VPWidenSC,
431 VPBlendSC,
432 VPHistogramSC,
433 // START: Phi-like recipes. Need to be kept together.
434 VPWidenPHISC,
435 VPPredInstPHISC,
436 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
437 // VPHeaderPHIRecipe need to be kept together.
438 VPCanonicalIVPHISC,
439 VPCurrentIterationPHISC,
440 VPActiveLaneMaskPHISC,
441 VPFirstOrderRecurrencePHISC,
442 VPWidenIntOrFpInductionSC,
443 VPWidenPointerInductionSC,
444 VPReductionPHISC,
445 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
446 // END: Phi-like recipes
447 VPFirstPHISC = VPWidenPHISC,
448 VPFirstHeaderPHISC = VPCanonicalIVPHISC,
449 VPLastHeaderPHISC = VPReductionPHISC,
450 VPLastPHISC = VPReductionPHISC,
451 };
452
453 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
455 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
456
457 ~VPRecipeBase() override = default;
458
459 /// Clone the current recipe.
460 virtual VPRecipeBase *clone() = 0;
461
462 /// \return the VPBasicBlock which this VPRecipe belongs to.
463 VPBasicBlock *getParent() { return Parent; }
464 const VPBasicBlock *getParent() const { return Parent; }
465
466 /// \return the VPRegionBlock which the recipe belongs to.
467 VPRegionBlock *getRegion();
468 const VPRegionBlock *getRegion() const;
469
470 /// The method which generates the output IR instructions that correspond to
471 /// this VPRecipe, thereby "executing" the VPlan.
472 virtual void execute(VPTransformState &State) = 0;
473
474 /// Return the cost of this recipe, taking into account if the cost
475 /// computation should be skipped and the ForceTargetInstructionCost flag.
476 /// Also takes care of printing the cost for debugging.
478
479 /// Insert an unlinked recipe into a basic block immediately before
480 /// the specified recipe.
481 void insertBefore(VPRecipeBase *InsertPos);
482 /// Insert an unlinked recipe into \p BB immediately before the insertion
483 /// point \p IP;
484 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
485
486 /// Insert an unlinked Recipe into a basic block immediately after
487 /// the specified Recipe.
488 void insertAfter(VPRecipeBase *InsertPos);
489
490 /// Unlink this recipe from its current VPBasicBlock and insert it into
491 /// the VPBasicBlock that MovePos lives in, right after MovePos.
492 void moveAfter(VPRecipeBase *MovePos);
493
494 /// Unlink this recipe and insert into BB before I.
495 ///
496 /// \pre I is a valid iterator into BB.
497 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
498
499 /// This method unlinks 'this' from the containing basic block, but does not
500 /// delete it.
501 void removeFromParent();
502
503 /// This method unlinks 'this' from the containing basic block and deletes it.
504 ///
505 /// \returns an iterator pointing to the element after the erased one
507
508 /// \return an ID for the concrete type of this object.
509 unsigned getVPRecipeID() const { return SubclassID; }
510
511 /// Method to support type inquiry through isa, cast, and dyn_cast.
512 static inline bool classof(const VPDef *D) {
513 // All VPDefs are also VPRecipeBases.
514 return true;
515 }
516
517 static inline bool classof(const VPUser *U) { return true; }
518
519 /// Returns true if the recipe may have side-effects.
520 bool mayHaveSideEffects() const;
521
522 /// Returns true for PHI-like recipes.
523 bool isPhi() const;
524
525 /// Returns true if the recipe may read from memory.
526 bool mayReadFromMemory() const;
527
528 /// Returns true if the recipe may write to memory.
529 bool mayWriteToMemory() const;
530
531 /// Returns true if the recipe may read from or write to memory.
532 bool mayReadOrWriteMemory() const {
534 }
535
536 /// Returns the debug location of the recipe.
537 DebugLoc getDebugLoc() const { return DL; }
538
539 /// Return true if the recipe is a scalar cast.
540 bool isScalarCast() const;
541
542 /// Set the recipe's debug location to \p NewDL.
543 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
544
545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
546 /// Dump the recipe to stderr (for debugging).
547 LLVM_ABI_FOR_TEST void dump() const;
548
549 /// Print the recipe, delegating to printRecipe().
550 void print(raw_ostream &O, const Twine &Indent,
552#endif
553
554protected:
555 /// Compute the cost of this recipe either using a recipe's specialized
556 /// implementation or using the legacy cost model and the underlying
557 /// instructions.
558 virtual InstructionCost computeCost(ElementCount VF,
559 VPCostContext &Ctx) const;
560
561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
562 /// Each concrete VPRecipe prints itself, without printing common information,
563 /// like debug info or metadata.
564 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
565 VPSlotTracker &SlotTracker) const = 0;
566#endif
567};
568
569// Helper macro to define common classof implementations for recipes.
570#define VP_CLASSOF_IMPL(VPRecipeID) \
571 static inline bool classof(const VPRecipeBase *R) { \
572 return R->getVPRecipeID() == VPRecipeID; \
573 } \
574 static inline bool classof(const VPValue *V) { \
575 auto *R = V->getDefiningRecipe(); \
576 return R && R->getVPRecipeID() == VPRecipeID; \
577 } \
578 static inline bool classof(const VPUser *U) { \
579 auto *R = dyn_cast<VPRecipeBase>(U); \
580 return R && R->getVPRecipeID() == VPRecipeID; \
581 } \
582 static inline bool classof(const VPSingleDefRecipe *R) { \
583 return R->getVPRecipeID() == VPRecipeID; \
584 }
585
586/// VPSingleDef is a base class for recipes for modeling a sequence of one or
587/// more output IR that define a single result VPValue.
588/// Note that VPRecipeBase must be inherited from before VPValue.
590public:
591 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
593 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
594
595 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
597 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
598
599 static inline bool classof(const VPRecipeBase *R) {
600 switch (R->getVPRecipeID()) {
601 case VPRecipeBase::VPDerivedIVSC:
602 case VPRecipeBase::VPExpandSCEVSC:
603 case VPRecipeBase::VPExpressionSC:
604 case VPRecipeBase::VPInstructionSC:
605 case VPRecipeBase::VPReductionEVLSC:
606 case VPRecipeBase::VPReductionSC:
607 case VPRecipeBase::VPReplicateSC:
608 case VPRecipeBase::VPScalarIVStepsSC:
609 case VPRecipeBase::VPVectorPointerSC:
610 case VPRecipeBase::VPVectorEndPointerSC:
611 case VPRecipeBase::VPWidenCallSC:
612 case VPRecipeBase::VPWidenCanonicalIVSC:
613 case VPRecipeBase::VPWidenCastSC:
614 case VPRecipeBase::VPWidenGEPSC:
615 case VPRecipeBase::VPWidenIntrinsicSC:
616 case VPRecipeBase::VPWidenSC:
617 case VPRecipeBase::VPBlendSC:
618 case VPRecipeBase::VPPredInstPHISC:
619 case VPRecipeBase::VPCanonicalIVPHISC:
620 case VPRecipeBase::VPCurrentIterationPHISC:
621 case VPRecipeBase::VPActiveLaneMaskPHISC:
622 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
623 case VPRecipeBase::VPWidenPHISC:
624 case VPRecipeBase::VPWidenIntOrFpInductionSC:
625 case VPRecipeBase::VPWidenPointerInductionSC:
626 case VPRecipeBase::VPReductionPHISC:
627 return true;
628 case VPRecipeBase::VPBranchOnMaskSC:
629 case VPRecipeBase::VPInterleaveEVLSC:
630 case VPRecipeBase::VPInterleaveSC:
631 case VPRecipeBase::VPIRInstructionSC:
632 case VPRecipeBase::VPWidenLoadEVLSC:
633 case VPRecipeBase::VPWidenLoadSC:
634 case VPRecipeBase::VPWidenStoreEVLSC:
635 case VPRecipeBase::VPWidenStoreSC:
636 case VPRecipeBase::VPHistogramSC:
637 // TODO: Widened stores don't define a value, but widened loads do. Split
638 // the recipes to be able to make widened loads VPSingleDefRecipes.
639 return false;
640 }
641 llvm_unreachable("Unhandled VPRecipeID");
642 }
643
644 static inline bool classof(const VPValue *V) {
645 auto *R = V->getDefiningRecipe();
646 return R && classof(R);
647 }
648
649 static inline bool classof(const VPUser *U) {
650 auto *R = dyn_cast<VPRecipeBase>(U);
651 return R && classof(R);
652 }
653
654 VPSingleDefRecipe *clone() override = 0;
655
656 /// Returns the underlying instruction.
663
664#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
665 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
667#endif
668};
669
670/// Class to record and manage LLVM IR flags.
672 enum class OperationType : unsigned char {
673 Cmp,
674 FCmp,
675 OverflowingBinOp,
676 Trunc,
677 DisjointOp,
678 PossiblyExactOp,
679 GEPOp,
680 FPMathOp,
681 NonNegOp,
682 ReductionOp,
683 Other
684 };
685
686public:
687 struct WrapFlagsTy {
688 char HasNUW : 1;
689 char HasNSW : 1;
690
692 };
693
695 char HasNUW : 1;
696 char HasNSW : 1;
697
699 };
700
705
707 char NonNeg : 1;
708 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
709 };
710
711private:
712 struct ExactFlagsTy {
713 char IsExact : 1;
714 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
715 };
716 struct FastMathFlagsTy {
717 char AllowReassoc : 1;
718 char NoNaNs : 1;
719 char NoInfs : 1;
720 char NoSignedZeros : 1;
721 char AllowReciprocal : 1;
722 char AllowContract : 1;
723 char ApproxFunc : 1;
724
725 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
726 };
727 /// Holds both the predicate and fast-math flags for floating-point
728 /// comparisons.
729 struct FCmpFlagsTy {
730 uint8_t CmpPredStorage;
731 FastMathFlagsTy FMFs;
732 };
733 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
734 struct ReductionFlagsTy {
735 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
736 // additional kinds.
737 unsigned char Kind : 6;
738 // TODO: Derive order/in-loop from plan and remove here.
739 unsigned char IsOrdered : 1;
740 unsigned char IsInLoop : 1;
741 FastMathFlagsTy FMFs;
742
743 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
744 FastMathFlags FMFs)
745 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
746 IsInLoop(IsInLoop), FMFs(FMFs) {}
747 };
748
749 OperationType OpType;
750
751 union {
756 ExactFlagsTy ExactFlags;
759 FastMathFlagsTy FMFs;
760 FCmpFlagsTy FCmpFlags;
761 ReductionFlagsTy ReductionFlags;
763 };
764
765public:
766 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
767
769 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
770 OpType = OperationType::FCmp;
772 FCmp->getPredicate());
773 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
774 FCmpFlags.FMFs = FCmp->getFastMathFlags();
775 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
776 OpType = OperationType::Cmp;
778 Op->getPredicate());
779 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
780 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
781 OpType = OperationType::DisjointOp;
782 DisjointFlags.IsDisjoint = Op->isDisjoint();
783 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
784 OpType = OperationType::OverflowingBinOp;
785 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
786 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
787 OpType = OperationType::Trunc;
788 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
789 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
790 OpType = OperationType::PossiblyExactOp;
791 ExactFlags.IsExact = Op->isExact();
792 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
793 OpType = OperationType::GEPOp;
794 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
795 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
796 "wrap flags truncated");
797 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
798 OpType = OperationType::NonNegOp;
799 NonNegFlags.NonNeg = PNNI->hasNonNeg();
800 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
801 OpType = OperationType::FPMathOp;
802 FMFs = Op->getFastMathFlags();
803 }
804 }
805
806 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
808 assert(getPredicate() == Pred && "predicate truncated");
809 }
810
812 : OpType(OperationType::FCmp), AllFlags() {
814 assert(getPredicate() == Pred && "predicate truncated");
815 FCmpFlags.FMFs = FMFs;
816 }
817
819 : OpType(OperationType::OverflowingBinOp), AllFlags() {
820 this->WrapFlags = WrapFlags;
821 }
822
824 : OpType(OperationType::Trunc), AllFlags() {
825 this->TruncFlags = TruncFlags;
826 }
827
828 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
829 this->FMFs = FMFs;
830 }
831
833 : OpType(OperationType::DisjointOp), AllFlags() {
834 this->DisjointFlags = DisjointFlags;
835 }
836
838 : OpType(OperationType::NonNegOp), AllFlags() {
839 this->NonNegFlags = NonNegFlags;
840 }
841
842 VPIRFlags(ExactFlagsTy ExactFlags)
843 : OpType(OperationType::PossiblyExactOp), AllFlags() {
844 this->ExactFlags = ExactFlags;
845 }
846
848 : OpType(OperationType::GEPOp), AllFlags() {
849 GEPFlagsStorage = GEPFlags.getRaw();
850 }
851
852 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
853 : OpType(OperationType::ReductionOp), AllFlags() {
854 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
855 }
856
858 OpType = Other.OpType;
859 AllFlags[0] = Other.AllFlags[0];
860 AllFlags[1] = Other.AllFlags[1];
861 }
862
863 /// Only keep flags also present in \p Other. \p Other must have the same
864 /// OpType as the current object.
865 void intersectFlags(const VPIRFlags &Other);
866
867 /// Drop all poison-generating flags.
869 // NOTE: This needs to be kept in-sync with
870 // Instruction::dropPoisonGeneratingFlags.
871 switch (OpType) {
872 case OperationType::OverflowingBinOp:
873 WrapFlags.HasNUW = false;
874 WrapFlags.HasNSW = false;
875 break;
876 case OperationType::Trunc:
877 TruncFlags.HasNUW = false;
878 TruncFlags.HasNSW = false;
879 break;
880 case OperationType::DisjointOp:
881 DisjointFlags.IsDisjoint = false;
882 break;
883 case OperationType::PossiblyExactOp:
884 ExactFlags.IsExact = false;
885 break;
886 case OperationType::GEPOp:
887 GEPFlagsStorage = 0;
888 break;
889 case OperationType::FPMathOp:
890 case OperationType::FCmp:
891 case OperationType::ReductionOp:
892 getFMFsRef().NoNaNs = false;
893 getFMFsRef().NoInfs = false;
894 break;
895 case OperationType::NonNegOp:
896 NonNegFlags.NonNeg = false;
897 break;
898 case OperationType::Cmp:
899 case OperationType::Other:
900 break;
901 }
902 }
903
904 /// Apply the IR flags to \p I.
905 void applyFlags(Instruction &I) const {
906 switch (OpType) {
907 case OperationType::OverflowingBinOp:
908 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
909 I.setHasNoSignedWrap(WrapFlags.HasNSW);
910 break;
911 case OperationType::Trunc:
912 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
913 I.setHasNoSignedWrap(TruncFlags.HasNSW);
914 break;
915 case OperationType::DisjointOp:
916 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
917 break;
918 case OperationType::PossiblyExactOp:
919 I.setIsExact(ExactFlags.IsExact);
920 break;
921 case OperationType::GEPOp:
922 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
924 break;
925 case OperationType::FPMathOp:
926 case OperationType::FCmp: {
927 const FastMathFlagsTy &F = getFMFsRef();
928 I.setHasAllowReassoc(F.AllowReassoc);
929 I.setHasNoNaNs(F.NoNaNs);
930 I.setHasNoInfs(F.NoInfs);
931 I.setHasNoSignedZeros(F.NoSignedZeros);
932 I.setHasAllowReciprocal(F.AllowReciprocal);
933 I.setHasAllowContract(F.AllowContract);
934 I.setHasApproxFunc(F.ApproxFunc);
935 break;
936 }
937 case OperationType::NonNegOp:
938 I.setNonNeg(NonNegFlags.NonNeg);
939 break;
940 case OperationType::ReductionOp:
941 llvm_unreachable("reduction ops should not use applyFlags");
942 case OperationType::Cmp:
943 case OperationType::Other:
944 break;
945 }
946 }
947
949 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
950 "recipe doesn't have a compare predicate");
951 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
954 }
955
957 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
958 "recipe doesn't have a compare predicate");
959 if (OpType == OperationType::FCmp)
961 else
963 assert(getPredicate() == Pred && "predicate truncated");
964 }
965
969
970 /// Returns true if the recipe has a comparison predicate.
971 bool hasPredicate() const {
972 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
973 }
974
975 /// Returns true if the recipe has fast-math flags.
976 bool hasFastMathFlags() const {
977 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
978 OpType == OperationType::ReductionOp;
979 }
980
982
983 /// Returns true if the recipe has non-negative flag.
984 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
985
986 bool isNonNeg() const {
987 assert(OpType == OperationType::NonNegOp &&
988 "recipe doesn't have a NNEG flag");
989 return NonNegFlags.NonNeg;
990 }
991
992 bool hasNoUnsignedWrap() const {
993 switch (OpType) {
994 case OperationType::OverflowingBinOp:
995 return WrapFlags.HasNUW;
996 case OperationType::Trunc:
997 return TruncFlags.HasNUW;
998 default:
999 llvm_unreachable("recipe doesn't have a NUW flag");
1000 }
1001 }
1002
1003 bool hasNoSignedWrap() const {
1004 switch (OpType) {
1005 case OperationType::OverflowingBinOp:
1006 return WrapFlags.HasNSW;
1007 case OperationType::Trunc:
1008 return TruncFlags.HasNSW;
1009 default:
1010 llvm_unreachable("recipe doesn't have a NSW flag");
1011 }
1012 }
1013
1014 bool isDisjoint() const {
1015 assert(OpType == OperationType::DisjointOp &&
1016 "recipe cannot have a disjoing flag");
1017 return DisjointFlags.IsDisjoint;
1018 }
1019
1021 assert(OpType == OperationType::ReductionOp &&
1022 "recipe doesn't have reduction flags");
1023 return static_cast<RecurKind>(ReductionFlags.Kind);
1024 }
1025
1026 bool isReductionOrdered() const {
1027 assert(OpType == OperationType::ReductionOp &&
1028 "recipe doesn't have reduction flags");
1029 return ReductionFlags.IsOrdered;
1030 }
1031
1032 bool isReductionInLoop() const {
1033 assert(OpType == OperationType::ReductionOp &&
1034 "recipe doesn't have reduction flags");
1035 return ReductionFlags.IsInLoop;
1036 }
1037
1038private:
1039 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1040 FastMathFlagsTy &getFMFsRef() {
1041 if (OpType == OperationType::FCmp)
1042 return FCmpFlags.FMFs;
1043 if (OpType == OperationType::ReductionOp)
1044 return ReductionFlags.FMFs;
1045 return FMFs;
1046 }
1047 const FastMathFlagsTy &getFMFsRef() const {
1048 if (OpType == OperationType::FCmp)
1049 return FCmpFlags.FMFs;
1050 if (OpType == OperationType::ReductionOp)
1051 return ReductionFlags.FMFs;
1052 return FMFs;
1053 }
1054
1055public:
1056 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1057 /// otherwise. Opcodes not supporting default flags include compares and
1058 /// ComputeReductionResult.
1059 static VPIRFlags getDefaultFlags(unsigned Opcode);
1060
1061#if !defined(NDEBUG)
1062 /// Returns true if the set flags are valid for \p Opcode.
1063 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1064
1065 /// Returns true if \p Opcode has its required flags set.
1066 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1067#endif
1068
1069#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1070 void printFlags(raw_ostream &O) const;
1071#endif
1072};
1073
1074static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1075
1076/// A pure-virtual common base class for recipes defining a single VPValue and
1077/// using IR flags.
1079 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1080 const VPIRFlags &Flags,
1082 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1083
1084 static inline bool classof(const VPRecipeBase *R) {
1085 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1086 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1087 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1088 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1089 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1090 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1091 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1092 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1093 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1094 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1095 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1096 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1097 }
1098
1099 static inline bool classof(const VPUser *U) {
1100 auto *R = dyn_cast<VPRecipeBase>(U);
1101 return R && classof(R);
1102 }
1103
1104 static inline bool classof(const VPValue *V) {
1105 auto *R = V->getDefiningRecipe();
1106 return R && classof(R);
1107 }
1108
1110
1111 static inline bool classof(const VPSingleDefRecipe *R) {
1112 return classof(static_cast<const VPRecipeBase *>(R));
1113 }
1114
1115 void execute(VPTransformState &State) override = 0;
1116
1117 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1119 VPCostContext &Ctx) const;
1120};
1121
1122/// Helper to access the operand that contains the unroll part for this recipe
1123/// after unrolling.
1124template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1125protected:
1126 /// Return the VPValue operand containing the unroll part or null if there is
1127 /// no such operand.
1128 VPValue *getUnrollPartOperand(const VPUser &U) const;
1129
1130 /// Return the unroll part.
1131 unsigned getUnrollPart(const VPUser &U) const;
1132};
1133
1134/// Helper to manage IR metadata for recipes. It filters out metadata that
1135/// cannot be propagated.
1138
1139public:
1140 VPIRMetadata() = default;
1141
1142 /// Adds metatadata that can be preserved from the original instruction
1143 /// \p I.
1145
1146 /// Copy constructor for cloning.
1148
1150
1151 /// Add all metadata to \p I.
1152 void applyMetadata(Instruction &I) const;
1153
1154 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1155 /// already exists, it will be replaced. Otherwise, it will be added.
1156 void setMetadata(unsigned Kind, MDNode *Node) {
1157 auto It =
1158 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1159 return P.first == Kind;
1160 });
1161 if (It != Metadata.end())
1162 It->second = Node;
1163 else
1164 Metadata.emplace_back(Kind, Node);
1165 }
1166
1167 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1168 /// nodes that are common to both.
1169 void intersect(const VPIRMetadata &MD);
1170
1171 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1172 MDNode *getMetadata(unsigned Kind) const {
1173 auto It =
1174 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1175 return It != Metadata.end() ? It->second : nullptr;
1176 }
1177
1178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1179 /// Print metadata with node IDs.
1180 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1181#endif
1182};
1183
1184/// This is a concrete Recipe that models a single VPlan-level instruction.
1185/// While as any Recipe it may generate a sequence of IR instructions when
1186/// executed, these instructions would always form a single-def expression as
1187/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1188/// opcodes can take an optional mask. Masks may be assigned during
1189/// predication.
1191 public VPIRMetadata {
1192 friend class VPlanSlp;
1193
1194public:
1195 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1196 enum {
1198 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1199 // values of a first-order recurrence.
1203 // Creates a mask where each lane is active (true) whilst the current
1204 // counter (first operand + index) is less than the second operand. i.e.
1205 // mask[i] = icmpt ult (op0 + i), op1
1206 // The size of the mask returned is VF * Multiplier (UF, third op).
1210 // Increment the canonical IV separately for each unrolled part.
1212 // Abstract instruction that compares two values and branches. This is
1213 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1216 // Branch with 2 boolean condition operands and 3 successors. If condition
1217 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1218 // successor 1; otherwise branches to successor 2. Expanded after region
1219 // dissolution into: (1) an OR of the two conditions branching to
1220 // middle.split or successor 2, and (2) middle.split branching to successor
1221 // 0 or successor 1 based on condition 0.
1224 /// Given operands of (the same) struct type, creates a struct of fixed-
1225 /// width vectors each containing a struct field of all operands. The
1226 /// number of operands matches the element count of every vector.
1228 /// Creates a fixed-width vector containing all operands. The number of
1229 /// operands matches the vector element count.
1231 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1232 /// abstract VPInstruction whose single defined VPValue represents VF
1233 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1234 /// VPInstructions.
1236 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1237 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1240 // Extracts the last part of its operand. Removed during unrolling.
1242 // Extracts the last lane of its vector operand, per part.
1244 // Extracts the second-to-last lane from its operand or the second-to-last
1245 // part if it is scalar. In the latter case, the recipe will be removed
1246 // during unrolling.
1248 LogicalAnd, // Non-poison propagating logical And.
1249 LogicalOr, // Non-poison propagating logical Or.
1250 // Add an offset in bytes (second operand) to a base pointer (first
1251 // operand). Only generates scalar values (either for the first lane only or
1252 // for all lanes, depending on its uses).
1254 // Add a vector offset in bytes (second operand) to a scalar base pointer
1255 // (first operand).
1257 // Returns a scalar boolean value, which is true if any lane of its
1258 // (boolean) vector operands is true. It produces the reduced value across
1259 // all unrolled iterations. Unrolling will add all copies of its original
1260 // operand as additional operands. AnyOf is poison-safe as all operands
1261 // will be frozen.
1263 // Calculates the first active lane index of the vector predicate operands.
1264 // It produces the lane index across all unrolled iterations. Unrolling will
1265 // add all copies of its original operand as additional operands.
1266 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1267 // result even with operands that are all zeroes.
1269 // Calculates the last active lane index of the vector predicate operands.
1270 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1271 // tail-folding to extract the correct live-out value from the last active
1272 // iteration. It produces the lane index across all unrolled iterations.
1273 // Unrolling will add all copies of its original operand as additional
1274 // operands.
1276 // Returns a reversed vector for the operand.
1278
1279 // The opcodes below are used for VPInstructionWithType.
1280 //
1281 /// Scale the first operand (vector step) by the second operand
1282 /// (scalar-step). Casts both operands to the result type if needed.
1284 /// Start vector for reductions with 3 operands: the original start value,
1285 /// the identity value for the reduction and an integer indicating the
1286 /// scaling factor.
1288 // Creates a step vector starting from 0 to VF with a step of 1.
1290 /// Extracts a single lane (first operand) from a set of vector operands.
1291 /// The lane specifies an index into a vector formed by combining all vector
1292 /// operands (all operands after the first one).
1294 /// Explicit user for the resume phi of the canonical induction in the main
1295 /// VPlan, used by the epilogue vector loop.
1297 /// Extracts the lane from the first operand corresponding to the last
1298 /// active (non-zero) lane in the mask (second operand), or if no lanes
1299 /// were active in the mask, returns the default value (third operand).
1301
1302 /// Returns the value for vscale.
1304 /// Compute the exiting value of a wide induction after vectorization, that
1305 /// is the value of the last lane of the induction increment (i.e. its
1306 /// backedge value). Has the wide induction recipe as operand.
1310 };
1311
1312 /// Returns true if this VPInstruction generates scalar values for all lanes.
1313 /// Most VPInstructions generate a single value per part, either vector or
1314 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1315 /// values per all lanes, stemming from an original ingredient. This method
1316 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1317 /// underlying ingredient.
1318 bool doesGeneratePerAllLanes() const;
1319
1320 /// Return the number of operands determined by the opcode of the
1321 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1322 /// cannot be determined directly by the opcode.
1323 unsigned getNumOperandsForOpcode() const;
1324
1325private:
1326 typedef unsigned char OpcodeTy;
1327 OpcodeTy Opcode;
1328
1329 /// An optional name that can be used for the generated IR instruction.
1330 std::string Name;
1331
1332 /// Returns true if we can generate a scalar for the first lane only if
1333 /// needed.
1334 bool canGenerateScalarForFirstLane() const;
1335
1336 /// Utility methods serving execute(): generates a single vector instance of
1337 /// the modeled instruction. \returns the generated value. . In some cases an
1338 /// existing value is returned rather than a generated one.
1339 Value *generate(VPTransformState &State);
1340
1341 /// Returns true if the VPInstruction does not need masking.
1342 bool alwaysUnmasked() const {
1343 if (Opcode == VPInstruction::MaskedCond)
1344 return false;
1345
1346 // For now only VPInstructions with underlying values use masks.
1347 // TODO: provide masks to VPInstructions w/o underlying values.
1348 if (!getUnderlyingValue())
1349 return true;
1350
1351 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1352 }
1353
1354public:
1355 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1356 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1357 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1358
1359 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1360
1361 VPInstruction *clone() override {
1362 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1363 getDebugLoc(), Name);
1364 if (getUnderlyingValue())
1365 New->setUnderlyingValue(getUnderlyingInstr());
1366 return New;
1367 }
1368
1369 unsigned getOpcode() const { return Opcode; }
1370
1371 /// Generate the instruction.
1372 /// TODO: We currently execute only per-part unless a specific instance is
1373 /// provided.
1374 void execute(VPTransformState &State) override;
1375
1376 /// Return the cost of this VPInstruction.
1377 InstructionCost computeCost(ElementCount VF,
1378 VPCostContext &Ctx) const override;
1379
1380#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1381 /// Print the VPInstruction to dbgs() (for debugging).
1382 LLVM_DUMP_METHOD void dump() const;
1383#endif
1384
1385 bool hasResult() const {
1386 // CallInst may or may not have a result, depending on the called function.
1387 // Conservatively return calls have results for now.
1388 switch (getOpcode()) {
1389 case Instruction::Ret:
1390 case Instruction::Br:
1391 case Instruction::Store:
1392 case Instruction::Switch:
1393 case Instruction::IndirectBr:
1394 case Instruction::Resume:
1395 case Instruction::CatchRet:
1396 case Instruction::Unreachable:
1397 case Instruction::Fence:
1398 case Instruction::AtomicRMW:
1402 return false;
1403 default:
1404 return true;
1405 }
1406 }
1407
1408 /// Returns true if the VPInstruction has a mask operand.
1409 bool isMasked() const {
1410 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1411 // VPInstructions without a fixed number of operands cannot be masked.
1412 if (NumOpsForOpcode == -1u)
1413 return false;
1414 return NumOpsForOpcode + 1 == getNumOperands();
1415 }
1416
1417 /// Returns the number of operands, excluding the mask if the VPInstruction is
1418 /// masked.
1419 unsigned getNumOperandsWithoutMask() const {
1420 return getNumOperands() - isMasked();
1421 }
1422
1423 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1424 void addMask(VPValue *Mask) {
1425 assert(!isMasked() && "recipe is already masked");
1426 if (alwaysUnmasked())
1427 return;
1428 addOperand(Mask);
1429 }
1430
1431 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1432 /// VPInstructions.
1433 VPValue *getMask() const {
1434 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1435 }
1436
1437 /// Returns an iterator range over the operands excluding the mask operand
1438 /// if present.
1445
1446 /// Returns true if the underlying opcode may read from or write to memory.
1447 bool opcodeMayReadOrWriteFromMemory() const;
1448
1449 /// Returns true if the recipe only uses the first lane of operand \p Op.
1450 bool usesFirstLaneOnly(const VPValue *Op) const override;
1451
1452 /// Returns true if the recipe only uses the first part of operand \p Op.
1453 bool usesFirstPartOnly(const VPValue *Op) const override;
1454
1455 /// Returns true if this VPInstruction produces a scalar value from a vector,
1456 /// e.g. by performing a reduction or extracting a lane.
1457 bool isVectorToScalar() const;
1458
1459 /// Returns true if this VPInstruction's operands are single scalars and the
1460 /// result is also a single scalar.
1461 bool isSingleScalar() const;
1462
1463 /// Returns the symbolic name assigned to the VPInstruction.
1464 StringRef getName() const { return Name; }
1465
1466 /// Set the symbolic name for the VPInstruction.
1467 void setName(StringRef NewName) { Name = NewName.str(); }
1468
1469protected:
1470#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1471 /// Print the VPInstruction to \p O.
1472 void printRecipe(raw_ostream &O, const Twine &Indent,
1473 VPSlotTracker &SlotTracker) const override;
1474#endif
1475};
1476
1477/// A specialization of VPInstruction augmenting it with a dedicated result
1478/// type, to be used when the opcode and operands of the VPInstruction don't
1479/// directly determine the result type. Note that there is no separate recipe ID
1480/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1481/// distinguished purely by the opcode.
1483 /// Scalar result type produced by the recipe.
1484 Type *ResultTy;
1485
1486public:
1488 Type *ResultTy, const VPIRFlags &Flags = {},
1489 const VPIRMetadata &Metadata = {},
1491 const Twine &Name = "")
1492 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1493 ResultTy(ResultTy) {}
1494
1495 static inline bool classof(const VPRecipeBase *R) {
1496 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1497 // type information.
1498 if (R->isScalarCast())
1499 return true;
1500 auto *VPI = dyn_cast<VPInstruction>(R);
1501 if (!VPI)
1502 return false;
1503 switch (VPI->getOpcode()) {
1507 case Instruction::Load:
1508 return true;
1509 default:
1510 return false;
1511 }
1512 }
1513
1514 static inline bool classof(const VPUser *R) {
1516 }
1517
1518 VPInstruction *clone() override {
1519 auto *New =
1521 *this, *this, getDebugLoc(), getName());
1522 New->setUnderlyingValue(getUnderlyingValue());
1523 return New;
1524 }
1525
1526 void execute(VPTransformState &State) override;
1527
1528 /// Return the cost of this VPInstruction.
1530 VPCostContext &Ctx) const override {
1531 // TODO: Compute accurate cost after retiring the legacy cost model.
1532 return 0;
1533 }
1534
1535 Type *getResultType() const { return ResultTy; }
1536
1537protected:
1538#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1539 /// Print the recipe.
1540 void printRecipe(raw_ostream &O, const Twine &Indent,
1541 VPSlotTracker &SlotTracker) const override;
1542#endif
1543};
1544
1545/// Helper type to provide functions to access incoming values and blocks for
1546/// phi-like recipes.
1548protected:
1549 /// Return a VPRecipeBase* to the current object.
1550 virtual const VPRecipeBase *getAsRecipe() const = 0;
1551
1552public:
1553 virtual ~VPPhiAccessors() = default;
1554
1555 /// Returns the incoming VPValue with index \p Idx.
1556 VPValue *getIncomingValue(unsigned Idx) const {
1557 return getAsRecipe()->getOperand(Idx);
1558 }
1559
1560 /// Returns the incoming block with index \p Idx.
1561 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1562
1563 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1564 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1565
1566 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1567 /// block.
1568 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1569
1570 /// Returns the number of incoming values, also number of incoming blocks.
1571 virtual unsigned getNumIncoming() const {
1572 return getAsRecipe()->getNumOperands();
1573 }
1574
1575 /// Returns an interator range over the incoming values.
1577 return make_range(getAsRecipe()->op_begin(),
1578 getAsRecipe()->op_begin() + getNumIncoming());
1579 }
1580
1582 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1583
1584 /// Returns an iterator range over the incoming blocks.
1586 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1587 return getIncomingBlock(Idx);
1588 };
1589 return map_range(index_range(0, getNumIncoming()), GetBlock);
1590 }
1591
1592 /// Returns an iterator range over pairs of incoming values and corresponding
1593 /// incoming blocks.
1599
1600 /// Removes the incoming value for \p IncomingBlock, which must be a
1601 /// predecessor.
1602 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1603
1604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1605 /// Print the recipe.
1607#endif
1608};
1609
1612 const Twine &Name = "")
1613 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1614
1615 static inline bool classof(const VPUser *U) {
1616 auto *VPI = dyn_cast<VPInstruction>(U);
1617 return VPI && VPI->getOpcode() == Instruction::PHI;
1618 }
1619
1620 static inline bool classof(const VPValue *V) {
1621 auto *VPI = dyn_cast<VPInstruction>(V);
1622 return VPI && VPI->getOpcode() == Instruction::PHI;
1623 }
1624
1625 static inline bool classof(const VPSingleDefRecipe *SDR) {
1626 auto *VPI = dyn_cast<VPInstruction>(SDR);
1627 return VPI && VPI->getOpcode() == Instruction::PHI;
1628 }
1629
1630 VPPhi *clone() override {
1631 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1632 PhiR->setUnderlyingValue(getUnderlyingValue());
1633 return PhiR;
1634 }
1635
1636 void execute(VPTransformState &State) override;
1637
1638protected:
1639#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1640 /// Print the recipe.
1641 void printRecipe(raw_ostream &O, const Twine &Indent,
1642 VPSlotTracker &SlotTracker) const override;
1643#endif
1644
1645 const VPRecipeBase *getAsRecipe() const override { return this; }
1646};
1647
1648/// A recipe to wrap on original IR instruction not to be modified during
1649/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1650/// Expect PHIs, VPIRInstructions cannot have any operands.
1652 Instruction &I;
1653
1654protected:
1655 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1656 /// subclasses may need to be created, e.g. VPIRPhi.
1658 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1659
1660public:
1661 ~VPIRInstruction() override = default;
1662
1663 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1664 /// VPIRInstruction.
1666
1667 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1668
1670 auto *R = create(I);
1671 for (auto *Op : operands())
1672 R->addOperand(Op);
1673 return R;
1674 }
1675
1676 void execute(VPTransformState &State) override;
1677
1678 /// Return the cost of this VPIRInstruction.
1680 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1681
1682 Instruction &getInstruction() const { return I; }
1683
1684 bool usesScalars(const VPValue *Op) const override {
1686 "Op must be an operand of the recipe");
1687 return true;
1688 }
1689
1690 bool usesFirstPartOnly(const VPValue *Op) const override {
1692 "Op must be an operand of the recipe");
1693 return true;
1694 }
1695
1696 bool usesFirstLaneOnly(const VPValue *Op) const override {
1698 "Op must be an operand of the recipe");
1699 return true;
1700 }
1701
1702protected:
1703#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1704 /// Print the recipe.
1705 void printRecipe(raw_ostream &O, const Twine &Indent,
1706 VPSlotTracker &SlotTracker) const override;
1707#endif
1708};
1709
1710/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1711/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1712/// allowed, and it is used to add a new incoming value for the single
1713/// predecessor VPBB.
1715 public VPPhiAccessors {
1717
1718 static inline bool classof(const VPRecipeBase *U) {
1719 auto *R = dyn_cast<VPIRInstruction>(U);
1720 return R && isa<PHINode>(R->getInstruction());
1721 }
1722
1724
1725 void execute(VPTransformState &State) override;
1726
1727protected:
1728#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1729 /// Print the recipe.
1730 void printRecipe(raw_ostream &O, const Twine &Indent,
1731 VPSlotTracker &SlotTracker) const override;
1732#endif
1733
1734 const VPRecipeBase *getAsRecipe() const override { return this; }
1735};
1736
1737/// VPWidenRecipe is a recipe for producing a widened instruction using the
1738/// opcode and operands of the recipe. This recipe covers most of the
1739/// traditional vectorization cases where each recipe transforms into a
1740/// vectorized version of itself.
1742 public VPIRMetadata {
1743 unsigned Opcode;
1744
1745public:
1747 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1748 DebugLoc DL = {})
1749 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1750 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1751 setUnderlyingValue(&I);
1752 }
1753
1754 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1755 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1756 DebugLoc DL = {})
1757 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1758 VPIRMetadata(Metadata), Opcode(Opcode) {}
1759
1760 ~VPWidenRecipe() override = default;
1761
1762 VPWidenRecipe *clone() override {
1763 if (auto *UV = getUnderlyingValue())
1764 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1765 getDebugLoc());
1766 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1767 }
1768
1769 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1770
1771 /// Produce a widened instruction using the opcode and operands of the recipe,
1772 /// processing State.VF elements.
1773 void execute(VPTransformState &State) override;
1774
1775 /// Return the cost of this VPWidenRecipe.
1776 InstructionCost computeCost(ElementCount VF,
1777 VPCostContext &Ctx) const override;
1778
1779 unsigned getOpcode() const { return Opcode; }
1780
1781protected:
1782#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1783 /// Print the recipe.
1784 void printRecipe(raw_ostream &O, const Twine &Indent,
1785 VPSlotTracker &SlotTracker) const override;
1786#endif
1787
1788 /// Returns true if the recipe only uses the first lane of operand \p Op.
1789 bool usesFirstLaneOnly(const VPValue *Op) const override {
1791 "Op must be an operand of the recipe");
1792 return Opcode == Instruction::Select && Op == getOperand(0) &&
1793 Op->isDefinedOutsideLoopRegions();
1794 }
1795};
1796
1797/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1799 /// Cast instruction opcode.
1800 Instruction::CastOps Opcode;
1801
1802 /// Result type for the cast.
1803 Type *ResultTy;
1804
1805public:
1807 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1808 const VPIRMetadata &Metadata = {},
1810 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1811 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1812 assert(flagsValidForOpcode(Opcode) &&
1813 "Set flags not supported for the provided opcode");
1815 "Opcode requires specific flags to be set");
1817 }
1818
1819 ~VPWidenCastRecipe() override = default;
1820
1822 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1824 *this, *this, getDebugLoc());
1825 }
1826
1827 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1828
1829 /// Produce widened copies of the cast.
1830 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1831
1832 /// Return the cost of this VPWidenCastRecipe.
1834 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1835
1836 Instruction::CastOps getOpcode() const { return Opcode; }
1837
1838 /// Returns the result type of the cast.
1839 Type *getResultType() const { return ResultTy; }
1840
1841protected:
1842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1843 /// Print the recipe.
1844 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1845 VPSlotTracker &SlotTracker) const override;
1846#endif
1847};
1848
1849/// A recipe for widening vector intrinsics.
1851 /// ID of the vector intrinsic to widen.
1852 Intrinsic::ID VectorIntrinsicID;
1853
1854 /// Scalar return type of the intrinsic.
1855 Type *ResultTy;
1856
1857 /// True if the intrinsic may read from memory.
1858 bool MayReadFromMemory;
1859
1860 /// True if the intrinsic may read write to memory.
1861 bool MayWriteToMemory;
1862
1863 /// True if the intrinsic may have side-effects.
1864 bool MayHaveSideEffects;
1865
1866public:
1868 ArrayRef<VPValue *> CallArguments, Type *Ty,
1869 const VPIRFlags &Flags = {},
1870 const VPIRMetadata &MD = {},
1872 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1873 Flags, DL),
1874 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1875 MayReadFromMemory(CI.mayReadFromMemory()),
1876 MayWriteToMemory(CI.mayWriteToMemory()),
1877 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1878 setUnderlyingValue(&CI);
1879 }
1880
1882 ArrayRef<VPValue *> CallArguments, Type *Ty,
1883 const VPIRFlags &Flags = {},
1884 const VPIRMetadata &Metadata = {},
1886 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1887 Flags, DL),
1888 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1889 ResultTy(Ty) {
1890 LLVMContext &Ctx = Ty->getContext();
1891 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1892 MemoryEffects ME = Attrs.getMemoryEffects();
1893 MayReadFromMemory = !ME.onlyWritesMemory();
1894 MayWriteToMemory = !ME.onlyReadsMemory();
1895 MayHaveSideEffects = MayWriteToMemory ||
1896 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1897 !Attrs.hasAttribute(Attribute::WillReturn);
1898 }
1899
1900 ~VPWidenIntrinsicRecipe() override = default;
1901
1903 if (Value *CI = getUnderlyingValue())
1904 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1905 operands(), ResultTy, *this, *this,
1906 getDebugLoc());
1907 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1908 *this, *this, getDebugLoc());
1909 }
1910
1911 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1912
1913 /// Produce a widened version of the vector intrinsic.
1914 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1915
1916 /// Return the cost of this vector intrinsic.
1918 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1919
1920 /// Return the ID of the intrinsic.
1921 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1922
1923 /// Return the scalar return type of the intrinsic.
1924 Type *getResultType() const { return ResultTy; }
1925
1926 /// Return to name of the intrinsic as string.
1928
1929 /// Returns true if the intrinsic may read from memory.
1930 bool mayReadFromMemory() const { return MayReadFromMemory; }
1931
1932 /// Returns true if the intrinsic may write to memory.
1933 bool mayWriteToMemory() const { return MayWriteToMemory; }
1934
1935 /// Returns true if the intrinsic may have side-effects.
1936 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1937
1938 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1939
1940protected:
1941#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1942 /// Print the recipe.
1943 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1944 VPSlotTracker &SlotTracker) const override;
1945#endif
1946};
1947
1948/// A recipe for widening Call instructions using library calls.
1950 public VPIRMetadata {
1951 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1952 /// between a given VF and the chosen vectorized variant, so there will be a
1953 /// different VPlan for each VF with a valid variant.
1954 Function *Variant;
1955
1956public:
1958 ArrayRef<VPValue *> CallArguments,
1959 const VPIRFlags &Flags = {},
1960 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1961 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
1962 DL),
1963 VPIRMetadata(Metadata), Variant(Variant) {
1964 setUnderlyingValue(UV);
1965 assert(
1966 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1967 "last operand must be the called function");
1968 }
1969
1970 ~VPWidenCallRecipe() override = default;
1971
1973 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1974 *this, *this, getDebugLoc());
1975 }
1976
1977 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
1978
1979 /// Produce a widened version of the call instruction.
1980 void execute(VPTransformState &State) override;
1981
1982 /// Return the cost of this VPWidenCallRecipe.
1983 InstructionCost computeCost(ElementCount VF,
1984 VPCostContext &Ctx) const override;
1985
1989
1992
1993protected:
1994#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1995 /// Print the recipe.
1996 void printRecipe(raw_ostream &O, const Twine &Indent,
1997 VPSlotTracker &SlotTracker) const override;
1998#endif
1999};
2000
2001/// A recipe representing a sequence of load -> update -> store as part of
2002/// a histogram operation. This means there may be aliasing between vector
2003/// lanes, which is handled by the llvm.experimental.vector.histogram family
2004/// of intrinsics. The only update operations currently supported are
2005/// 'add' and 'sub' where the other term is loop-invariant.
2007 /// Opcode of the update operation, currently either add or sub.
2008 unsigned Opcode;
2009
2010public:
2011 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2013 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2014 Opcode(Opcode) {}
2015
2016 ~VPHistogramRecipe() override = default;
2017
2019 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2020 }
2021
2022 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2023
2024 /// Produce a vectorized histogram operation.
2025 void execute(VPTransformState &State) override;
2026
2027 /// Return the cost of this VPHistogramRecipe.
2029 VPCostContext &Ctx) const override;
2030
2031 unsigned getOpcode() const { return Opcode; }
2032
2033 /// Return the mask operand if one was provided, or a null pointer if all
2034 /// lanes should be executed unconditionally.
2035 VPValue *getMask() const {
2036 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2037 }
2038
2039protected:
2040#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2041 /// Print the recipe
2042 void printRecipe(raw_ostream &O, const Twine &Indent,
2043 VPSlotTracker &SlotTracker) const override;
2044#endif
2045};
2046
2047/// A recipe for handling GEP instructions.
2049 Type *SourceElementTy;
2050
2051 bool isPointerLoopInvariant() const {
2052 return getOperand(0)->isDefinedOutsideLoopRegions();
2053 }
2054
2055 bool isIndexLoopInvariant(unsigned I) const {
2056 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2057 }
2058
2059public:
2061 const VPIRFlags &Flags = {},
2063 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2064 SourceElementTy(GEP->getSourceElementType()) {
2065 setUnderlyingValue(GEP);
2067 (void)Metadata;
2069 assert(Metadata.empty() && "unexpected metadata on GEP");
2070 }
2071
2072 ~VPWidenGEPRecipe() override = default;
2073
2076 operands(), *this, getDebugLoc());
2077 }
2078
2079 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2080
2081 /// This recipe generates a GEP instruction.
2082 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2083
2084 /// Generate the gep nodes.
2085 void execute(VPTransformState &State) override;
2086
2087 Type *getSourceElementType() const { return SourceElementTy; }
2088
2089 /// Return the cost of this VPWidenGEPRecipe.
2091 VPCostContext &Ctx) const override {
2092 // TODO: Compute accurate cost after retiring the legacy cost model.
2093 return 0;
2094 }
2095
2096 /// Returns true if the recipe only uses the first lane of operand \p Op.
2097 bool usesFirstLaneOnly(const VPValue *Op) const override;
2098
2099protected:
2100#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2101 /// Print the recipe.
2102 void printRecipe(raw_ostream &O, const Twine &Indent,
2103 VPSlotTracker &SlotTracker) const override;
2104#endif
2105};
2106
2107/// A recipe to compute a pointer to the last element of each part of a widened
2108/// memory access for widened memory accesses of SourceElementTy. Used for
2109/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2110/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2111/// unroller otherwise.
2113 Type *SourceElementTy;
2114
2115 /// The constant stride of the pointer computed by this recipe, expressed in
2116 /// units of SourceElementTy.
2117 int64_t Stride;
2118
2119public:
2120 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2121 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2122 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2123 GEPFlags, DL),
2124 SourceElementTy(SourceElementTy), Stride(Stride) {
2125 assert(Stride < 0 && "Stride must be negative");
2126 }
2127
2128 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2129
2130 Type *getSourceElementType() const { return SourceElementTy; }
2131 int64_t getStride() const { return Stride; }
2132 VPValue *getPointer() const { return getOperand(0); }
2133 VPValue *getVFValue() const { return getOperand(1); }
2135 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2136 }
2137
2138 /// Adds the offset operand to the recipe.
2139 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2140 void materializeOffset(unsigned Part = 0);
2141
2142 void execute(VPTransformState &State) override;
2143
2144 bool usesFirstLaneOnly(const VPValue *Op) const override {
2146 "Op must be an operand of the recipe");
2147 return true;
2148 }
2149
2150 /// Return the cost of this VPVectorPointerRecipe.
2152 VPCostContext &Ctx) const override {
2153 // TODO: Compute accurate cost after retiring the legacy cost model.
2154 return 0;
2155 }
2156
2157 /// Returns true if the recipe only uses the first part of operand \p Op.
2158 bool usesFirstPartOnly(const VPValue *Op) const override {
2160 "Op must be an operand of the recipe");
2161 assert(getNumOperands() <= 2 && "must have at most two operands");
2162 return true;
2163 }
2164
2166 auto *VEPR = new VPVectorEndPointerRecipe(
2169 if (auto *Offset = getOffset())
2170 VEPR->addOperand(Offset);
2171 return VEPR;
2172 }
2173
2174protected:
2175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2176 /// Print the recipe.
2177 void printRecipe(raw_ostream &O, const Twine &Indent,
2178 VPSlotTracker &SlotTracker) const override;
2179#endif
2180};
2181
2182/// A recipe to compute the pointers for widened memory accesses of \p
2183/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2184/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2186 Type *SourceElementTy;
2187
2188public:
2189 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2190 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2191 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2192 SourceElementTy(SourceElementTy) {}
2193
2194 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2195
2197 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2198 }
2199
2200 void execute(VPTransformState &State) override;
2201
2202 Type *getSourceElementType() const { return SourceElementTy; }
2203
2204 bool usesFirstLaneOnly(const VPValue *Op) const override {
2206 "Op must be an operand of the recipe");
2207 return true;
2208 }
2209
2210 /// Returns true if the recipe only uses the first part of operand \p Op.
2211 bool usesFirstPartOnly(const VPValue *Op) const override {
2213 "Op must be an operand of the recipe");
2214 assert(getNumOperands() <= 2 && "must have at most two operands");
2215 return true;
2216 }
2217
2219 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2221 if (auto *Off = getOffset())
2222 Clone->addOperand(Off);
2223 return Clone;
2224 }
2225
2226 /// Return the cost of this VPHeaderPHIRecipe.
2228 VPCostContext &Ctx) const override {
2229 // TODO: Compute accurate cost after retiring the legacy cost model.
2230 return 0;
2231 }
2232
2233protected:
2234#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2235 /// Print the recipe.
2236 void printRecipe(raw_ostream &O, const Twine &Indent,
2237 VPSlotTracker &SlotTracker) const override;
2238#endif
2239};
2240
2241/// A pure virtual base class for all recipes modeling header phis, including
2242/// phis for first order recurrences, pointer inductions and reductions. The
2243/// start value is the first operand of the recipe and the incoming value from
2244/// the backedge is the second operand.
2245///
2246/// Inductions are modeled using the following sub-classes:
2247/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2248/// starting at a specified value (zero for the main vector loop, the resume
2249/// value for the epilogue vector loop) and stepping by 1. The induction
2250/// controls exiting of the vector loop by comparing against the vector trip
2251/// count. Produces a single scalar PHI for the induction value per
2252/// iteration.
2253/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2254/// floating point inductions with arbitrary start and step values. Produces
2255/// a vector PHI per-part.
2256/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2257/// pointer induction. Produces either a vector PHI per-part or scalar values
2258/// per-lane based on the canonical induction.
2259/// * VPFirstOrderRecurrencePHIRecipe
2260/// * VPReductionPHIRecipe
2261/// * VPActiveLaneMaskPHIRecipe
2262/// * VPEVLBasedIVPHIRecipe
2264 public VPPhiAccessors {
2265protected:
2266 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2267 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2268 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2269
2270 const VPRecipeBase *getAsRecipe() const override { return this; }
2271
2272public:
2273 ~VPHeaderPHIRecipe() override = default;
2274
2275 /// Method to support type inquiry through isa, cast, and dyn_cast.
2276 static inline bool classof(const VPRecipeBase *R) {
2277 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2278 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2279 }
2280 static inline bool classof(const VPValue *V) {
2281 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2282 }
2283 static inline bool classof(const VPSingleDefRecipe *R) {
2284 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2285 }
2286
2287 /// Generate the phi nodes.
2288 void execute(VPTransformState &State) override = 0;
2289
2290 /// Return the cost of this header phi recipe.
2292 VPCostContext &Ctx) const override;
2293
2294 /// Returns the start value of the phi, if one is set.
2296 return getNumOperands() == 0 ? nullptr : getOperand(0);
2297 }
2299 return getNumOperands() == 0 ? nullptr : getOperand(0);
2300 }
2301
2302 /// Update the start value of the recipe.
2304
2305 /// Returns the incoming value from the loop backedge.
2307 return getOperand(1);
2308 }
2309
2310 /// Update the incoming value from the loop backedge.
2312
2313 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2314 /// to be a recipe.
2316 return *getBackedgeValue()->getDefiningRecipe();
2317 }
2318
2319protected:
2320#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2321 /// Print the recipe.
2322 void printRecipe(raw_ostream &O, const Twine &Indent,
2323 VPSlotTracker &SlotTracker) const override = 0;
2324#endif
2325};
2326
2327/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2328/// VPWidenPointerInductionRecipe), providing shared functionality, including
2329/// retrieving the step value, induction descriptor and original phi node.
2331 const InductionDescriptor &IndDesc;
2332
2333public:
2334 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2335 VPValue *Step, const InductionDescriptor &IndDesc,
2336 DebugLoc DL)
2337 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2338 addOperand(Step);
2339 }
2340
2341 static inline bool classof(const VPRecipeBase *R) {
2342 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2343 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2344 }
2345
2346 static inline bool classof(const VPValue *V) {
2347 auto *R = V->getDefiningRecipe();
2348 return R && classof(R);
2349 }
2350
2351 static inline bool classof(const VPSingleDefRecipe *R) {
2352 return classof(static_cast<const VPRecipeBase *>(R));
2353 }
2354
2355 void execute(VPTransformState &State) override = 0;
2356
2357 /// Returns the start value of the induction.
2359
2360 /// Returns the step value of the induction.
2362 const VPValue *getStepValue() const { return getOperand(1); }
2363
2364 /// Update the step value of the recipe.
2365 void setStepValue(VPValue *V) { setOperand(1, V); }
2366
2368 const VPValue *getVFValue() const { return getOperand(2); }
2369
2370 /// Returns the number of incoming values, also number of incoming blocks.
2371 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2372 /// incoming value, its start value.
2373 unsigned getNumIncoming() const override { return 1; }
2374
2375 /// Returns the underlying PHINode if one exists, or null otherwise.
2379
2380 /// Returns the induction descriptor for the recipe.
2381 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2382
2384 // TODO: All operands of base recipe must exist and be at same index in
2385 // derived recipe.
2387 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2388 }
2389
2391 // TODO: All operands of base recipe must exist and be at same index in
2392 // derived recipe.
2394 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2395 }
2396
2397 /// Returns true if the recipe only uses the first lane of operand \p Op.
2398 bool usesFirstLaneOnly(const VPValue *Op) const override {
2400 "Op must be an operand of the recipe");
2401 // The recipe creates its own wide start value, so it only requests the
2402 // first lane of the operand.
2403 // TODO: Remove once creating the start value is modeled separately.
2404 return Op == getStartValue() || Op == getStepValue();
2405 }
2406};
2407
2408/// A recipe for handling phi nodes of integer and floating-point inductions,
2409/// producing their vector values. This is an abstract recipe and must be
2410/// converted to concrete recipes before executing.
2412 public VPIRFlags {
2413 TruncInst *Trunc;
2414
2415 // If this recipe is unrolled it will have 2 additional operands.
2416 bool isUnrolled() const { return getNumOperands() == 5; }
2417
2418public:
2420 VPValue *VF, const InductionDescriptor &IndDesc,
2421 const VPIRFlags &Flags, DebugLoc DL)
2422 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2423 Start, Step, IndDesc, DL),
2424 VPIRFlags(Flags), Trunc(nullptr) {
2425 addOperand(VF);
2426 }
2427
2429 VPValue *VF, const InductionDescriptor &IndDesc,
2430 TruncInst *Trunc, const VPIRFlags &Flags,
2431 DebugLoc DL)
2432 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2433 Start, Step, IndDesc, DL),
2434 VPIRFlags(Flags), Trunc(Trunc) {
2435 addOperand(VF);
2437 (void)Metadata;
2438 if (Trunc)
2440 assert(Metadata.empty() && "unexpected metadata on Trunc");
2441 }
2442
2444
2450
2451 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2452
2453 void execute(VPTransformState &State) override {
2454 llvm_unreachable("cannot execute this recipe, should be expanded via "
2455 "expandVPWidenIntOrFpInductionRecipe");
2456 }
2457
2458 /// Returns the start value of the induction.
2460
2461 /// If the recipe has been unrolled, return the VPValue for the induction
2462 /// increment, otherwise return null.
2464 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2465 }
2466
2467 /// Returns the number of incoming values, also number of incoming blocks.
2468 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2469 /// incoming value, its start value.
2470 unsigned getNumIncoming() const override { return 1; }
2471
2472 /// Returns the first defined value as TruncInst, if it is one or nullptr
2473 /// otherwise.
2474 TruncInst *getTruncInst() { return Trunc; }
2475 const TruncInst *getTruncInst() const { return Trunc; }
2476
2477 /// Returns true if the induction is canonical, i.e. starting at 0 and
2478 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2479 /// same type as the canonical induction.
2480 bool isCanonical() const;
2481
2482 /// Returns the scalar type of the induction.
2484 return Trunc ? Trunc->getType() : getStartValue()->getType();
2485 }
2486
2487 /// Returns the VPValue representing the value of this induction at
2488 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2489 /// take place.
2491 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2492 }
2493
2494protected:
2495#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2496 /// Print the recipe.
2497 void printRecipe(raw_ostream &O, const Twine &Indent,
2498 VPSlotTracker &SlotTracker) const override;
2499#endif
2500};
2501
2503public:
2504 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2505 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2506 /// VF*UF.
2508 VPValue *NumUnrolledElems,
2509 const InductionDescriptor &IndDesc, DebugLoc DL)
2510 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2511 Start, Step, IndDesc, DL) {
2512 addOperand(NumUnrolledElems);
2513 }
2514
2516
2522
2523 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2524
2525 /// Generate vector values for the pointer induction.
2526 void execute(VPTransformState &State) override {
2527 llvm_unreachable("cannot execute this recipe, should be expanded via "
2528 "expandVPWidenPointerInduction");
2529 };
2530
2531 /// Returns true if only scalar values will be generated.
2532 bool onlyScalarsGenerated(bool IsScalable);
2533
2534protected:
2535#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2536 /// Print the recipe.
2537 void printRecipe(raw_ostream &O, const Twine &Indent,
2538 VPSlotTracker &SlotTracker) const override;
2539#endif
2540};
2541
2542/// A recipe for widened phis. Incoming values are operands of the recipe and
2543/// their operand index corresponds to the incoming predecessor block. If the
2544/// recipe is placed in an entry block to a (non-replicate) region, it must have
2545/// exactly 2 incoming values, the first from the predecessor of the region and
2546/// the second from the exiting block of the region.
2548 public VPPhiAccessors {
2549 /// Name to use for the generated IR instruction for the widened phi.
2550 std::string Name;
2551
2552public:
2553 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2554 /// debug location \p DL.
2555 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2556 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2557 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2558 Name(Name.str()) {
2559 if (Start)
2560 addOperand(Start);
2561 }
2562
2565 getOperand(0), getDebugLoc(), Name);
2567 C->addOperand(Op);
2568 return C;
2569 }
2570
2571 ~VPWidenPHIRecipe() override = default;
2572
2573 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2574
2575 /// Generate the phi/select nodes.
2576 void execute(VPTransformState &State) override;
2577
2578 /// Return the cost of this VPWidenPHIRecipe.
2580 VPCostContext &Ctx) const override;
2581
2582protected:
2583#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2584 /// Print the recipe.
2585 void printRecipe(raw_ostream &O, const Twine &Indent,
2586 VPSlotTracker &SlotTracker) const override;
2587#endif
2588
2589 const VPRecipeBase *getAsRecipe() const override { return this; }
2590};
2591
2592/// A recipe for handling first-order recurrence phis. The start value is the
2593/// first operand of the recipe and the incoming value from the backedge is the
2594/// second operand.
2597 VPValue &BackedgeValue)
2598 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2599 &Start) {
2600 addOperand(&BackedgeValue);
2601 }
2602
2603 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2604
2609
2610 void execute(VPTransformState &State) override;
2611
2612 /// Return the cost of this first-order recurrence phi recipe.
2614 VPCostContext &Ctx) const override;
2615
2616 /// Returns true if the recipe only uses the first lane of operand \p Op.
2617 bool usesFirstLaneOnly(const VPValue *Op) const override {
2619 "Op must be an operand of the recipe");
2620 return Op == getStartValue();
2621 }
2622
2623protected:
2624#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2625 /// Print the recipe.
2626 void printRecipe(raw_ostream &O, const Twine &Indent,
2627 VPSlotTracker &SlotTracker) const override;
2628#endif
2629};
2630
2631/// Possible variants of a reduction.
2632
2633/// This reduction is ordered and in-loop.
2634struct RdxOrdered {};
2635/// This reduction is in-loop.
2636struct RdxInLoop {};
2637/// This reduction is unordered with the partial result scaled down by some
2638/// factor.
2641};
2642using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2643
2644inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2645 unsigned ScaleFactor) {
2646 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2647 if (Ordered)
2648 return RdxOrdered{};
2649 if (InLoop)
2650 return RdxInLoop{};
2651 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2652}
2653
2654/// A recipe for handling reduction phis. The start value is the first operand
2655/// of the recipe and the incoming value from the backedge is the second
2656/// operand.
2658 /// The recurrence kind of the reduction.
2659 const RecurKind Kind;
2660
2661 ReductionStyle Style;
2662
2663 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2664 /// patterns for argmin/argmax).
2665 /// TODO: Also support cases where the phi itself has a single use, but its
2666 /// compare has multiple uses.
2667 bool HasUsesOutsideReductionChain;
2668
2669public:
2670 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2672 VPValue &BackedgeValue, ReductionStyle Style,
2673 const VPIRFlags &Flags,
2674 bool HasUsesOutsideReductionChain = false)
2675 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2676 VPIRFlags(Flags), Kind(Kind), Style(Style),
2677 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2678 addOperand(&BackedgeValue);
2679 }
2680
2681 ~VPReductionPHIRecipe() override = default;
2682
2684 return new VPReductionPHIRecipe(
2686 *getOperand(0), *getBackedgeValue(), Style, *this,
2687 HasUsesOutsideReductionChain);
2688 }
2689
2690 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2691
2692 /// Generate the phi/select nodes.
2693 void execute(VPTransformState &State) override;
2694
2695 /// Get the factor that the VF of this recipe's output should be scaled by, or
2696 /// 1 if it isn't scaled.
2697 unsigned getVFScaleFactor() const {
2698 auto *Partial = std::get_if<RdxUnordered>(&Style);
2699 return Partial ? Partial->VFScaleFactor : 1;
2700 }
2701
2702 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2703 /// > 1.
2704 void setVFScaleFactor(unsigned ScaleFactor) {
2705 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2706 Style = RdxUnordered{ScaleFactor};
2707 }
2708
2709 /// Returns the number of incoming values, also number of incoming blocks.
2710 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2711 /// incoming value, its start value.
2712 unsigned getNumIncoming() const override { return 2; }
2713
2714 /// Returns the recurrence kind of the reduction.
2715 RecurKind getRecurrenceKind() const { return Kind; }
2716
2717 /// Returns true, if the phi is part of an ordered reduction.
2718 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2719
2720 /// Returns true if the phi is part of an in-loop reduction.
2721 bool isInLoop() const {
2722 return std::holds_alternative<RdxInLoop>(Style) ||
2723 std::holds_alternative<RdxOrdered>(Style);
2724 }
2725
2726 /// Returns true if the reduction outputs a vector with a scaled down VF.
2727 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2728
2729 /// Returns true, if the phi is part of a multi-use reduction.
2731 return HasUsesOutsideReductionChain;
2732 }
2733
2734 /// Returns true if the recipe only uses the first lane of operand \p Op.
2735 bool usesFirstLaneOnly(const VPValue *Op) const override {
2737 "Op must be an operand of the recipe");
2738 return isOrdered() || isInLoop();
2739 }
2740
2741protected:
2742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2743 /// Print the recipe.
2744 void printRecipe(raw_ostream &O, const Twine &Indent,
2745 VPSlotTracker &SlotTracker) const override;
2746#endif
2747};
2748
2749/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2750/// instructions.
2752public:
2753 /// The blend operation is a User of the incoming values and of their
2754 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2755 /// be omitted (implied by passing an odd number of operands) in which case
2756 /// all other incoming values are merged into it.
2758 const VPIRFlags &Flags, DebugLoc DL)
2759 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2760 assert(Operands.size() >= 2 && "Expected at least two operands!");
2761 setUnderlyingValue(Phi);
2762 }
2763
2764 VPBlendRecipe *clone() override {
2766 operands(), *this, getDebugLoc());
2767 }
2768
2769 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2770
2771 /// A normalized blend is one that has an odd number of operands, whereby the
2772 /// first operand does not have an associated mask.
2773 bool isNormalized() const { return getNumOperands() % 2; }
2774
2775 /// Return the number of incoming values, taking into account when normalized
2776 /// the first incoming value will have no mask.
2777 unsigned getNumIncomingValues() const {
2778 return (getNumOperands() + isNormalized()) / 2;
2779 }
2780
2781 /// Return incoming value number \p Idx.
2782 VPValue *getIncomingValue(unsigned Idx) const {
2783 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2784 }
2785
2786 /// Return mask number \p Idx.
2787 VPValue *getMask(unsigned Idx) const {
2788 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2789 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2790 }
2791
2792 /// Set mask number \p Idx to \p V.
2793 void setMask(unsigned Idx, VPValue *V) {
2794 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2795 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2796 }
2797
2798 void execute(VPTransformState &State) override {
2799 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2800 }
2801
2802 /// Return the cost of this VPWidenMemoryRecipe.
2803 InstructionCost computeCost(ElementCount VF,
2804 VPCostContext &Ctx) const override;
2805
2806 /// Returns true if the recipe only uses the first lane of operand \p Op.
2807 bool usesFirstLaneOnly(const VPValue *Op) const override {
2809 "Op must be an operand of the recipe");
2810 // Recursing through Blend recipes only, must terminate at header phi's the
2811 // latest.
2812 return all_of(users(),
2813 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2814 }
2815
2816protected:
2817#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2818 /// Print the recipe.
2819 void printRecipe(raw_ostream &O, const Twine &Indent,
2820 VPSlotTracker &SlotTracker) const override;
2821#endif
2822};
2823
2824/// A common base class for interleaved memory operations.
2825/// An Interleaved memory operation is a memory access method that combines
2826/// multiple strided loads/stores into a single wide load/store with shuffles.
2827/// The first operand is the start address. The optional operands are, in order,
2828/// the stored values and the mask.
2830 public VPIRMetadata {
2832
2833 /// Indicates if the interleave group is in a conditional block and requires a
2834 /// mask.
2835 bool HasMask = false;
2836
2837 /// Indicates if gaps between members of the group need to be masked out or if
2838 /// unusued gaps can be loaded speculatively.
2839 bool NeedsMaskForGaps = false;
2840
2841protected:
2842 VPInterleaveBase(const unsigned char SC,
2844 ArrayRef<VPValue *> Operands,
2845 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2846 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2847 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2848 NeedsMaskForGaps(NeedsMaskForGaps) {
2849 // TODO: extend the masked interleaved-group support to reversed access.
2850 assert((!Mask || !IG->isReverse()) &&
2851 "Reversed masked interleave-group not supported.");
2852 if (StoredValues.empty()) {
2853 for (unsigned I = 0; I < IG->getFactor(); ++I)
2854 if (Instruction *Inst = IG->getMember(I)) {
2855 assert(!Inst->getType()->isVoidTy() && "must have result");
2856 new VPRecipeValue(this, Inst);
2857 }
2858 } else {
2859 for (auto *SV : StoredValues)
2860 addOperand(SV);
2861 }
2862 if (Mask) {
2863 HasMask = true;
2864 addOperand(Mask);
2865 }
2866 }
2867
2868public:
2869 VPInterleaveBase *clone() override = 0;
2870
2871 static inline bool classof(const VPRecipeBase *R) {
2872 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2873 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2874 }
2875
2876 static inline bool classof(const VPUser *U) {
2877 auto *R = dyn_cast<VPRecipeBase>(U);
2878 return R && classof(R);
2879 }
2880
2881 /// Return the address accessed by this recipe.
2882 VPValue *getAddr() const {
2883 return getOperand(0); // Address is the 1st, mandatory operand.
2884 }
2885
2886 /// Return the mask used by this recipe. Note that a full mask is represented
2887 /// by a nullptr.
2888 VPValue *getMask() const {
2889 // Mask is optional and the last operand.
2890 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2891 }
2892
2893 /// Return true if the access needs a mask because of the gaps.
2894 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2895
2897
2898 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2899
2900 void execute(VPTransformState &State) override {
2901 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2902 }
2903
2904 /// Return the cost of this recipe.
2905 InstructionCost computeCost(ElementCount VF,
2906 VPCostContext &Ctx) const override;
2907
2908 /// Returns true if the recipe only uses the first lane of operand \p Op.
2909 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2910
2911 /// Returns the number of stored operands of this interleave group. Returns 0
2912 /// for load interleave groups.
2913 virtual unsigned getNumStoreOperands() const = 0;
2914
2915 /// Return the VPValues stored by this interleave group. If it is a load
2916 /// interleave group, return an empty ArrayRef.
2918 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2920 }
2921};
2922
2923/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2924/// or stores into one wide load/store and shuffles. The first operand of a
2925/// VPInterleave recipe is the address, followed by the stored values, followed
2926/// by an optional mask.
2928public:
2930 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2931 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2932 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2933 Mask, NeedsMaskForGaps, MD, DL) {}
2934
2935 ~VPInterleaveRecipe() override = default;
2936
2940 needsMaskForGaps(), *this, getDebugLoc());
2941 }
2942
2943 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2944
2945 /// Generate the wide load or store, and shuffles.
2946 void execute(VPTransformState &State) override;
2947
2948 bool usesFirstLaneOnly(const VPValue *Op) const override {
2950 "Op must be an operand of the recipe");
2951 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2952 }
2953
2954 unsigned getNumStoreOperands() const override {
2955 return getNumOperands() - (getMask() ? 2 : 1);
2956 }
2957
2958protected:
2959#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2960 /// Print the recipe.
2961 void printRecipe(raw_ostream &O, const Twine &Indent,
2962 VPSlotTracker &SlotTracker) const override;
2963#endif
2964};
2965
2966/// A recipe for interleaved memory operations with vector-predication
2967/// intrinsics. The first operand is the address, the second operand is the
2968/// explicit vector length. Stored values and mask are optional operands.
2970public:
2972 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2973 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2974 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2975 R.getDebugLoc()) {
2976 assert(!getInterleaveGroup()->isReverse() &&
2977 "Reversed interleave-group with tail folding is not supported.");
2978 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2979 "supported for scalable vector.");
2980 }
2981
2982 ~VPInterleaveEVLRecipe() override = default;
2983
2985 llvm_unreachable("cloning not implemented yet");
2986 }
2987
2988 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
2989
2990 /// The VPValue of the explicit vector length.
2991 VPValue *getEVL() const { return getOperand(1); }
2992
2993 /// Generate the wide load or store, and shuffles.
2994 void execute(VPTransformState &State) override;
2995
2996 /// The recipe only uses the first lane of the address, and EVL operand.
2997 bool usesFirstLaneOnly(const VPValue *Op) const override {
2999 "Op must be an operand of the recipe");
3000 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3001 Op == getEVL();
3002 }
3003
3004 unsigned getNumStoreOperands() const override {
3005 return getNumOperands() - (getMask() ? 3 : 2);
3006 }
3007
3008protected:
3009#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3010 /// Print the recipe.
3011 void printRecipe(raw_ostream &O, const Twine &Indent,
3012 VPSlotTracker &SlotTracker) const override;
3013#endif
3014};
3015
3016/// A recipe to represent inloop, ordered or partial reduction operations. It
3017/// performs a reduction on a vector operand into a scalar (vector in the case
3018/// of a partial reduction) value, and adds the result to a chain. The Operands
3019/// are {ChainOp, VecOp, [Condition]}.
3021
3022 /// The recurrence kind for the reduction in question.
3023 RecurKind RdxKind;
3024 /// Whether the reduction is conditional.
3025 bool IsConditional = false;
3026 ReductionStyle Style;
3027
3028protected:
3029 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3031 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3032 ReductionStyle Style, DebugLoc DL)
3033 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3034 Style(Style) {
3035 if (CondOp) {
3036 IsConditional = true;
3037 addOperand(CondOp);
3038 }
3040 }
3041
3042public:
3044 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3046 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3047 {ChainOp, VecOp}, CondOp, Style, DL) {}
3048
3050 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3052 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3053 {ChainOp, VecOp}, CondOp, Style, DL) {}
3054
3055 ~VPReductionRecipe() override = default;
3056
3058 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3060 getCondOp(), Style, getDebugLoc());
3061 }
3062
3063 static inline bool classof(const VPRecipeBase *R) {
3064 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3065 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3066 }
3067
3068 static inline bool classof(const VPUser *U) {
3069 auto *R = dyn_cast<VPRecipeBase>(U);
3070 return R && classof(R);
3071 }
3072
3073 static inline bool classof(const VPValue *VPV) {
3074 const VPRecipeBase *R = VPV->getDefiningRecipe();
3075 return R && classof(R);
3076 }
3077
3078 static inline bool classof(const VPSingleDefRecipe *R) {
3079 return classof(static_cast<const VPRecipeBase *>(R));
3080 }
3081
3082 /// Generate the reduction in the loop.
3083 void execute(VPTransformState &State) override;
3084
3085 /// Return the cost of VPReductionRecipe.
3086 InstructionCost computeCost(ElementCount VF,
3087 VPCostContext &Ctx) const override;
3088
3089 /// Return the recurrence kind for the in-loop reduction.
3090 RecurKind getRecurrenceKind() const { return RdxKind; }
3091 /// Return true if the in-loop reduction is ordered.
3092 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3093 /// Return true if the in-loop reduction is conditional.
3094 bool isConditional() const { return IsConditional; };
3095 /// Returns true if the reduction outputs a vector with a scaled down VF.
3096 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3097 /// Returns true if the reduction is in-loop.
3098 bool isInLoop() const {
3099 return std::holds_alternative<RdxInLoop>(Style) ||
3100 std::holds_alternative<RdxOrdered>(Style);
3101 }
3102 /// The VPValue of the scalar Chain being accumulated.
3103 VPValue *getChainOp() const { return getOperand(0); }
3104 /// The VPValue of the vector value to be reduced.
3105 VPValue *getVecOp() const { return getOperand(1); }
3106 /// The VPValue of the condition for the block.
3108 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3109 }
3110 /// Get the factor that the VF of this recipe's output should be scaled by, or
3111 /// 1 if it isn't scaled.
3112 unsigned getVFScaleFactor() const {
3113 auto *Partial = std::get_if<RdxUnordered>(&Style);
3114 return Partial ? Partial->VFScaleFactor : 1;
3115 }
3116
3117protected:
3118#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3119 /// Print the recipe.
3120 void printRecipe(raw_ostream &O, const Twine &Indent,
3121 VPSlotTracker &SlotTracker) const override;
3122#endif
3123};
3124
3125/// A recipe to represent inloop reduction operations with vector-predication
3126/// intrinsics, performing a reduction on a vector operand with the explicit
3127/// vector length (EVL) into a scalar value, and adding the result to a chain.
3128/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3130public:
3133 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3134 R.getFastMathFlags(),
3136 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3137 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3138 DL) {}
3139
3140 ~VPReductionEVLRecipe() override = default;
3141
3143 llvm_unreachable("cloning not implemented yet");
3144 }
3145
3146 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3147
3148 /// Generate the reduction in the loop
3149 void execute(VPTransformState &State) override;
3150
3151 /// The VPValue of the explicit vector length.
3152 VPValue *getEVL() const { return getOperand(2); }
3153
3154 /// Returns true if the recipe only uses the first lane of operand \p Op.
3155 bool usesFirstLaneOnly(const VPValue *Op) const override {
3157 "Op must be an operand of the recipe");
3158 return Op == getEVL();
3159 }
3160
3161protected:
3162#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3163 /// Print the recipe.
3164 void printRecipe(raw_ostream &O, const Twine &Indent,
3165 VPSlotTracker &SlotTracker) const override;
3166#endif
3167};
3168
3169/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3170/// copies of the original scalar type, one per lane, instead of producing a
3171/// single copy of widened type for all lanes. If the instruction is known to be
3172/// a single scalar, only one copy, per lane zero, will be generated.
3174 public VPIRMetadata {
3175 /// Indicator if only a single replica per lane is needed.
3176 bool IsSingleScalar;
3177
3178 /// Indicator if the replicas are also predicated.
3179 bool IsPredicated;
3180
3181public:
3183 bool IsSingleScalar, VPValue *Mask = nullptr,
3184 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3185 DebugLoc DL = DebugLoc::getUnknown())
3186 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3187 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3188 IsPredicated(Mask) {
3189 setUnderlyingValue(I);
3190 if (Mask)
3191 addOperand(Mask);
3192 }
3193
3194 ~VPReplicateRecipe() override = default;
3195
3197 auto *Copy = new VPReplicateRecipe(
3198 getUnderlyingInstr(), operands(), IsSingleScalar,
3199 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3200 Copy->transferFlags(*this);
3201 return Copy;
3202 }
3203
3204 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3205
3206 /// Generate replicas of the desired Ingredient. Replicas will be generated
3207 /// for all parts and lanes unless a specific part and lane are specified in
3208 /// the \p State.
3209 void execute(VPTransformState &State) override;
3210
3211 /// Return the cost of this VPReplicateRecipe.
3212 InstructionCost computeCost(ElementCount VF,
3213 VPCostContext &Ctx) const override;
3214
3215 bool isSingleScalar() const { return IsSingleScalar; }
3216
3217 bool isPredicated() const { return IsPredicated; }
3218
3219 /// Returns true if the recipe only uses the first lane of operand \p Op.
3220 bool usesFirstLaneOnly(const VPValue *Op) const override {
3222 "Op must be an operand of the recipe");
3223 return isSingleScalar();
3224 }
3225
3226 /// Returns true if the recipe uses scalars of operand \p Op.
3227 bool usesScalars(const VPValue *Op) const override {
3229 "Op must be an operand of the recipe");
3230 return true;
3231 }
3232
3233 /// Returns true if the recipe is used by a widened recipe via an intervening
3234 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3235 /// in a vector.
3236 bool shouldPack() const;
3237
3238 /// Return the mask of a predicated VPReplicateRecipe.
3240 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3241 return getOperand(getNumOperands() - 1);
3242 }
3243
3244 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3245
3246protected:
3247#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3248 /// Print the recipe.
3249 void printRecipe(raw_ostream &O, const Twine &Indent,
3250 VPSlotTracker &SlotTracker) const override;
3251#endif
3252};
3253
3254/// A recipe for generating conditional branches on the bits of a mask.
3256public:
3258 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3259
3262 }
3263
3264 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3265
3266 /// Generate the extraction of the appropriate bit from the block mask and the
3267 /// conditional branch.
3268 void execute(VPTransformState &State) override;
3269
3270 /// Return the cost of this VPBranchOnMaskRecipe.
3271 InstructionCost computeCost(ElementCount VF,
3272 VPCostContext &Ctx) const override;
3273
3274#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3275 /// Print the recipe.
3276 void printRecipe(raw_ostream &O, const Twine &Indent,
3277 VPSlotTracker &SlotTracker) const override {
3278 O << Indent << "BRANCH-ON-MASK ";
3280 }
3281#endif
3282
3283 /// Returns true if the recipe uses scalars of operand \p Op.
3284 bool usesScalars(const VPValue *Op) const override {
3286 "Op must be an operand of the recipe");
3287 return true;
3288 }
3289};
3290
3291/// A recipe to combine multiple recipes into a single 'expression' recipe,
3292/// which should be considered a single entity for cost-modeling and transforms.
3293/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3294/// expression recipes, before execute. The individual expression recipes are
3295/// completely disconnected from the def-use graph of other recipes not part of
3296/// the expression. Def-use edges between pairs of expression recipes remain
3297/// intact, whereas every edge between an expression recipe and a recipe outside
3298/// the expression is elevated to connect the non-expression recipe with the
3299/// VPExpressionRecipe itself.
3300class VPExpressionRecipe : public VPSingleDefRecipe {
3301 /// Recipes included in this VPExpressionRecipe. This could contain
3302 /// duplicates.
3303 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3304
3305 /// Temporary VPValues used for external operands of the expression, i.e.
3306 /// operands not defined by recipes in the expression.
3307 SmallVector<VPValue *> LiveInPlaceholders;
3308
3309 enum class ExpressionTypes {
3310 /// Represents an inloop extended reduction operation, performing a
3311 /// reduction on an extended vector operand into a scalar value, and adding
3312 /// the result to a chain.
3313 ExtendedReduction,
3314 /// Represent an inloop multiply-accumulate reduction, multiplying the
3315 /// extended vector operands, performing a reduction.add on the result, and
3316 /// adding the scalar result to a chain.
3317 ExtMulAccReduction,
3318 /// Represent an inloop multiply-accumulate reduction, multiplying the
3319 /// vector operands, performing a reduction.add on the result, and adding
3320 /// the scalar result to a chain.
3321 MulAccReduction,
3322 /// Represent an inloop multiply-accumulate reduction, multiplying the
3323 /// extended vector operands, negating the multiplication, performing a
3324 /// reduction.add on the result, and adding the scalar result to a chain.
3325 ExtNegatedMulAccReduction,
3326 };
3327
3328 /// Type of the expression.
3329 ExpressionTypes ExpressionType;
3330
3331 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3332 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3333 /// in the expression) are replaced by temporary VPValues and the original
3334 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3335 /// as needed (excluding last) to ensure they are only used by other recipes
3336 /// in the expression.
3337 VPExpressionRecipe(ExpressionTypes ExpressionType,
3338 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3339
3340public:
3342 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3344 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3347 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3348 {Ext0, Ext1, Mul, Red}) {}
3351 VPReductionRecipe *Red)
3352 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3353 {Ext0, Ext1, Mul, Sub, Red}) {
3354 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3355 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3356 "Expected an add reduction");
3357 assert(getNumOperands() >= 3 && "Expected at least three operands");
3358 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3359 assert(SubConst && SubConst->isZero() &&
3360 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3361 }
3362
3364 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3365 for (auto *R : reverse(ExpressionRecipes)) {
3366 if (ExpressionRecipesSeen.insert(R).second)
3367 delete R;
3368 }
3369 for (VPValue *T : LiveInPlaceholders)
3370 delete T;
3371 }
3372
3373 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3374
3375 VPExpressionRecipe *clone() override {
3376 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3377 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3378 for (auto *R : ExpressionRecipes)
3379 NewExpressiondRecipes.push_back(R->clone());
3380 for (auto *New : NewExpressiondRecipes) {
3381 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3382 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3383 // Update placeholder operands in the cloned recipe to use the external
3384 // operands, to be internalized when the cloned expression is constructed.
3385 for (const auto &[Placeholder, OutsideOp] :
3386 zip(LiveInPlaceholders, operands()))
3387 New->replaceUsesOfWith(Placeholder, OutsideOp);
3388 }
3389 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3390 }
3391
3392 /// Return the VPValue to use to infer the result type of the recipe.
3394 unsigned OpIdx =
3395 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3396 : 1;
3397 return getOperand(getNumOperands() - OpIdx);
3398 }
3399
3400 /// Insert the recipes of the expression back into the VPlan, directly before
3401 /// the current recipe. Leaves the expression recipe empty, which must be
3402 /// removed before codegen.
3403 void decompose();
3404
3405 unsigned getVFScaleFactor() const {
3406 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3407 return PR ? PR->getVFScaleFactor() : 1;
3408 }
3409
3410 /// Method for generating code, must not be called as this recipe is abstract.
3411 void execute(VPTransformState &State) override {
3412 llvm_unreachable("recipe must be removed before execute");
3413 }
3414
3416 VPCostContext &Ctx) const override;
3417
3418 /// Returns true if this expression contains recipes that may read from or
3419 /// write to memory.
3420 bool mayReadOrWriteMemory() const;
3421
3422 /// Returns true if this expression contains recipes that may have side
3423 /// effects.
3424 bool mayHaveSideEffects() const;
3425
3426 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3427 bool isSingleScalar() const;
3428
3429protected:
3430#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3431 /// Print the recipe.
3432 void printRecipe(raw_ostream &O, const Twine &Indent,
3433 VPSlotTracker &SlotTracker) const override;
3434#endif
3435};
3436
3437/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3438/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3439/// order to merge values that are set under such a branch and feed their uses.
3440/// The phi nodes can be scalar or vector depending on the users of the value.
3441/// This recipe works in concert with VPBranchOnMaskRecipe.
3443public:
3444 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3445 /// nodes after merging back from a Branch-on-Mask.
3447 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3448 ~VPPredInstPHIRecipe() override = default;
3449
3451 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3452 }
3453
3454 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3455
3456 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3457 /// retain SSA form.
3458 void execute(VPTransformState &State) override;
3459
3460 /// Return the cost of this VPPredInstPHIRecipe.
3462 VPCostContext &Ctx) const override {
3463 // TODO: Compute accurate cost after retiring the legacy cost model.
3464 return 0;
3465 }
3466
3467 /// Returns true if the recipe uses scalars of operand \p Op.
3468 bool usesScalars(const VPValue *Op) const override {
3470 "Op must be an operand of the recipe");
3471 return true;
3472 }
3473
3474protected:
3475#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3476 /// Print the recipe.
3477 void printRecipe(raw_ostream &O, const Twine &Indent,
3478 VPSlotTracker &SlotTracker) const override;
3479#endif
3480};
3481
3482/// A common base class for widening memory operations. An optional mask can be
3483/// provided as the last operand.
3485 public VPIRMetadata {
3486protected:
3488
3489 /// Alignment information for this memory access.
3491
3492 /// Whether the accessed addresses are consecutive.
3494
3495 /// Whether the consecutive accessed addresses are in reverse order.
3497
3498 /// Whether the memory access is masked.
3499 bool IsMasked = false;
3500
3501 void setMask(VPValue *Mask) {
3502 assert(!IsMasked && "cannot re-set mask");
3503 if (!Mask)
3504 return;
3505 addOperand(Mask);
3506 IsMasked = true;
3507 }
3508
3509 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3510 std::initializer_list<VPValue *> Operands,
3511 bool Consecutive, bool Reverse,
3512 const VPIRMetadata &Metadata, DebugLoc DL)
3513 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3515 Reverse(Reverse) {
3516 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3518 "Reversed acccess without VPVectorEndPointerRecipe address?");
3519 }
3520
3521public:
3523 llvm_unreachable("cloning not supported");
3524 }
3525
3526 static inline bool classof(const VPRecipeBase *R) {
3527 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3528 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3529 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3530 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3531 }
3532
3533 static inline bool classof(const VPUser *U) {
3534 auto *R = dyn_cast<VPRecipeBase>(U);
3535 return R && classof(R);
3536 }
3537
3538 /// Return whether the loaded-from / stored-to addresses are consecutive.
3539 bool isConsecutive() const { return Consecutive; }
3540
3541 /// Return whether the consecutive loaded/stored addresses are in reverse
3542 /// order.
3543 bool isReverse() const { return Reverse; }
3544
3545 /// Return the address accessed by this recipe.
3546 VPValue *getAddr() const { return getOperand(0); }
3547
3548 /// Returns true if the recipe is masked.
3549 bool isMasked() const { return IsMasked; }
3550
3551 /// Return the mask used by this recipe. Note that a full mask is represented
3552 /// by a nullptr.
3553 VPValue *getMask() const {
3554 // Mask is optional and therefore the last operand.
3555 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3556 }
3557
3558 /// Returns the alignment of the memory access.
3559 Align getAlign() const { return Alignment; }
3560
3561 /// Generate the wide load/store.
3562 void execute(VPTransformState &State) override {
3563 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3564 }
3565
3566 /// Return the cost of this VPWidenMemoryRecipe.
3567 InstructionCost computeCost(ElementCount VF,
3568 VPCostContext &Ctx) const override;
3569
3571};
3572
3573/// A recipe for widening load operations, using the address to load from and an
3574/// optional mask.
3576 public VPRecipeValue {
3578 bool Consecutive, bool Reverse,
3579 const VPIRMetadata &Metadata, DebugLoc DL)
3580 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3581 Consecutive, Reverse, Metadata, DL),
3582 VPRecipeValue(this, &Load) {
3583 setMask(Mask);
3584 }
3585
3588 getMask(), Consecutive, Reverse, *this,
3589 getDebugLoc());
3590 }
3591
3592 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3593
3594 /// Generate a wide load or gather.
3595 void execute(VPTransformState &State) override;
3596
3597 /// Returns true if the recipe only uses the first lane of operand \p Op.
3598 bool usesFirstLaneOnly(const VPValue *Op) const override {
3600 "Op must be an operand of the recipe");
3601 // Widened, consecutive loads operations only demand the first lane of
3602 // their address.
3603 return Op == getAddr() && isConsecutive();
3604 }
3605
3606protected:
3607#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3608 /// Print the recipe.
3609 void printRecipe(raw_ostream &O, const Twine &Indent,
3610 VPSlotTracker &SlotTracker) const override;
3611#endif
3612};
3613
3614/// A recipe for widening load operations with vector-predication intrinsics,
3615/// using the address to load from, the explicit vector length and an optional
3616/// mask.
3618 public VPRecipeValue {
3620 VPValue *Mask)
3621 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3622 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3623 L.getDebugLoc()),
3624 VPRecipeValue(this, &getIngredient()) {
3625 setMask(Mask);
3626 }
3627
3628 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3629
3630 /// Return the EVL operand.
3631 VPValue *getEVL() const { return getOperand(1); }
3632
3633 /// Generate the wide load or gather.
3634 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3635
3636 /// Return the cost of this VPWidenLoadEVLRecipe.
3638 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3639
3640 /// Returns true if the recipe only uses the first lane of operand \p Op.
3641 bool usesFirstLaneOnly(const VPValue *Op) const override {
3643 "Op must be an operand of the recipe");
3644 // Widened loads only demand the first lane of EVL and consecutive loads
3645 // only demand the first lane of their address.
3646 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3647 }
3648
3649protected:
3650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3651 /// Print the recipe.
3652 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3653 VPSlotTracker &SlotTracker) const override;
3654#endif
3655};
3656
3657/// A recipe for widening store operations, using the stored value, the address
3658/// to store to and an optional mask.
3660 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3661 VPValue *Mask, bool Consecutive, bool Reverse,
3662 const VPIRMetadata &Metadata, DebugLoc DL)
3663 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3664 {Addr, StoredVal}, Consecutive, Reverse, Metadata,
3665 DL) {
3666 setMask(Mask);
3667 }
3668
3674
3675 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3676
3677 /// Return the value stored by this recipe.
3678 VPValue *getStoredValue() const { return getOperand(1); }
3679
3680 /// Generate a wide store or scatter.
3681 void execute(VPTransformState &State) override;
3682
3683 /// Returns true if the recipe only uses the first lane of operand \p Op.
3684 bool usesFirstLaneOnly(const VPValue *Op) const override {
3686 "Op must be an operand of the recipe");
3687 // Widened, consecutive stores only demand the first lane of their address,
3688 // unless the same operand is also stored.
3689 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3690 }
3691
3692protected:
3693#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3694 /// Print the recipe.
3695 void printRecipe(raw_ostream &O, const Twine &Indent,
3696 VPSlotTracker &SlotTracker) const override;
3697#endif
3698};
3699
3700/// A recipe for widening store operations with vector-predication intrinsics,
3701/// using the value to store, the address to store to, the explicit vector
3702/// length and an optional mask.
3705 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3706 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3707 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3708 S.isReverse(), S, S.getDebugLoc()) {
3709 setMask(Mask);
3710 }
3711
3712 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3713
3714 /// Return the address accessed by this recipe.
3715 VPValue *getStoredValue() const { return getOperand(1); }
3716
3717 /// Return the EVL operand.
3718 VPValue *getEVL() const { return getOperand(2); }
3719
3720 /// Generate the wide store or scatter.
3721 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3722
3723 /// Return the cost of this VPWidenStoreEVLRecipe.
3725 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3726
3727 /// Returns true if the recipe only uses the first lane of operand \p Op.
3728 bool usesFirstLaneOnly(const VPValue *Op) const override {
3730 "Op must be an operand of the recipe");
3731 if (Op == getEVL()) {
3732 assert(getStoredValue() != Op && "unexpected store of EVL");
3733 return true;
3734 }
3735 // Widened, consecutive memory operations only demand the first lane of
3736 // their address, unless the same operand is also stored. That latter can
3737 // happen with opaque pointers.
3738 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3739 }
3740
3741protected:
3742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3743 /// Print the recipe.
3744 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3745 VPSlotTracker &SlotTracker) const override;
3746#endif
3747};
3748
3749/// Recipe to expand a SCEV expression.
3751 const SCEV *Expr;
3752
3753public:
3755 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3756
3757 ~VPExpandSCEVRecipe() override = default;
3758
3759 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3760
3761 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3762
3763 void execute(VPTransformState &State) override {
3764 llvm_unreachable("SCEV expressions must be expanded before final execute");
3765 }
3766
3767 /// Return the cost of this VPExpandSCEVRecipe.
3769 VPCostContext &Ctx) const override {
3770 // TODO: Compute accurate cost after retiring the legacy cost model.
3771 return 0;
3772 }
3773
3774 const SCEV *getSCEV() const { return Expr; }
3775
3776protected:
3777#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3778 /// Print the recipe.
3779 void printRecipe(raw_ostream &O, const Twine &Indent,
3780 VPSlotTracker &SlotTracker) const override;
3781#endif
3782};
3783
3784/// Canonical scalar induction phi of the vector loop. Starting at the specified
3785/// start value (either 0 or the resume value when vectorizing the epilogue
3786/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3787/// canonical induction variable.
3789public:
3791 : VPHeaderPHIRecipe(VPRecipeBase::VPCanonicalIVPHISC, nullptr, StartV,
3792 DL) {}
3793
3794 ~VPCanonicalIVPHIRecipe() override = default;
3795
3798 R->addOperand(getBackedgeValue());
3799 return R;
3800 }
3801
3802 VP_CLASSOF_IMPL(VPRecipeBase::VPCanonicalIVPHISC)
3803
3804 void execute(VPTransformState &State) override {
3805 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3806 "scalar phi recipe");
3807 }
3808
3809 /// Returns the start value of the canonical induction.
3811
3812 /// Returns the scalar type of the induction.
3813 Type *getScalarType() const { return getStartValue()->getType(); }
3814
3815 /// Returns true if the recipe only uses the first lane of operand \p Op.
3816 bool usesFirstLaneOnly(const VPValue *Op) const override {
3818 "Op must be an operand of the recipe");
3819 return true;
3820 }
3821
3822 /// Returns true if the recipe only uses the first part of operand \p Op.
3823 bool usesFirstPartOnly(const VPValue *Op) const override {
3825 "Op must be an operand of the recipe");
3826 return true;
3827 }
3828
3829 /// Return the cost of this VPCanonicalIVPHIRecipe.
3831 VPCostContext &Ctx) const override {
3832 // For now, match the behavior of the legacy cost model.
3833 return 0;
3834 }
3835
3836protected:
3837#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3838 /// Print the recipe.
3839 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3840 VPSlotTracker &SlotTracker) const override;
3841#endif
3842};
3843
3844/// A recipe for generating the active lane mask for the vector loop that is
3845/// used to predicate the vector operations.
3847public:
3849 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3850 StartMask, DL) {}
3851
3852 ~VPActiveLaneMaskPHIRecipe() override = default;
3853
3856 if (getNumOperands() == 2)
3857 R->addOperand(getOperand(1));
3858 return R;
3859 }
3860
3861 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3862
3863 /// Generate the active lane mask phi of the vector loop.
3864 void execute(VPTransformState &State) override;
3865
3866protected:
3867#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3868 /// Print the recipe.
3869 void printRecipe(raw_ostream &O, const Twine &Indent,
3870 VPSlotTracker &SlotTracker) const override;
3871#endif
3872};
3873
3874/// A recipe for generating the phi node tracking the current scalar iteration
3875/// index. It starts at the start value of the canonical induction and gets
3876/// incremented by the number of scalar iterations processed by the vector loop
3877/// iteration. The increment does not have to be loop invariant.
3879public:
3881 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3882 StartIV, DL) {}
3883
3884 ~VPCurrentIterationPHIRecipe() override = default;
3885
3887 llvm_unreachable("cloning not implemented yet");
3888 }
3889
3890 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3891
3892 void execute(VPTransformState &State) override {
3893 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3894 "scalar phi recipe");
3895 }
3896
3897 /// Return the cost of this VPCurrentIterationPHIRecipe.
3899 VPCostContext &Ctx) const override {
3900 // For now, match the behavior of the legacy cost model.
3901 return 0;
3902 }
3903
3904 /// Returns true if the recipe only uses the first lane of operand \p Op.
3905 bool usesFirstLaneOnly(const VPValue *Op) const override {
3907 "Op must be an operand of the recipe");
3908 return true;
3909 }
3910
3911protected:
3912#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3913 /// Print the recipe.
3914 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3915 VPSlotTracker &SlotTracker) const override;
3916#endif
3917};
3918
3919/// A Recipe for widening the canonical induction variable of the vector loop.
3921 public VPUnrollPartAccessor<1> {
3922public:
3924 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3925
3926 ~VPWidenCanonicalIVRecipe() override = default;
3927
3932
3933 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3934
3935 /// Generate a canonical vector induction variable of the vector loop, with
3936 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3937 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3938 void execute(VPTransformState &State) override;
3939
3940 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3942 VPCostContext &Ctx) const override {
3943 // TODO: Compute accurate cost after retiring the legacy cost model.
3944 return 0;
3945 }
3946
3947protected:
3948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3949 /// Print the recipe.
3950 void printRecipe(raw_ostream &O, const Twine &Indent,
3951 VPSlotTracker &SlotTracker) const override;
3952#endif
3953};
3954
3955/// A recipe for converting the input value \p IV value to the corresponding
3956/// value of an IV with different start and step values, using Start + IV *
3957/// Step.
3959 /// Kind of the induction.
3961 /// If not nullptr, the floating point induction binary operator. Must be set
3962 /// for floating point inductions.
3963 const FPMathOperator *FPBinOp;
3964
3965 /// Name to use for the generated IR instruction for the derived IV.
3966 std::string Name;
3967
3968public:
3970 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3971 const Twine &Name = "")
3973 IndDesc.getKind(),
3974 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3975 Start, CanonicalIV, Step, Name) {}
3976
3978 const FPMathOperator *FPBinOp, VPIRValue *Start,
3979 VPValue *IV, VPValue *Step, const Twine &Name = "")
3980 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3981 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
3982
3983 ~VPDerivedIVRecipe() override = default;
3984
3986 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3987 getStepValue());
3988 }
3989
3990 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3991
3992 /// Generate the transformed value of the induction at offset StartValue (1.
3993 /// operand) + IV (2. operand) * StepValue (3, operand).
3994 void execute(VPTransformState &State) override;
3995
3996 /// Return the cost of this VPDerivedIVRecipe.
3998 VPCostContext &Ctx) const override {
3999 // TODO: Compute accurate cost after retiring the legacy cost model.
4000 return 0;
4001 }
4002
4003 Type *getScalarType() const { return getStartValue()->getType(); }
4004
4006 VPValue *getStepValue() const { return getOperand(2); }
4007
4008 /// Returns true if the recipe only uses the first lane of operand \p Op.
4009 bool usesFirstLaneOnly(const VPValue *Op) const override {
4011 "Op must be an operand of the recipe");
4012 return true;
4013 }
4014
4015protected:
4016#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4017 /// Print the recipe.
4018 void printRecipe(raw_ostream &O, const Twine &Indent,
4019 VPSlotTracker &SlotTracker) const override;
4020#endif
4021};
4022
4023/// A recipe for handling phi nodes of integer and floating-point inductions,
4024/// producing their scalar values. Before unrolling by UF the recipe represents
4025/// the VF*UF scalar values to be produced, or UF scalar values if only first
4026/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4027/// operand StartIndex to all unroll parts except part 0, as the recipe
4028/// represents the VF scalar values (this number of values is taken from
4029/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4031 Instruction::BinaryOps InductionOpcode;
4032
4033public:
4036 DebugLoc DL)
4037 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4038 FMFs, DL),
4039 InductionOpcode(Opcode) {}
4040
4042 VPValue *Step, VPValue *VF,
4045 IV, Step, VF, IndDesc.getInductionOpcode(),
4046 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4047 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4048 : FastMathFlags(),
4049 DL) {}
4050
4051 ~VPScalarIVStepsRecipe() override = default;
4052
4055 getOperand(2), InductionOpcode,
4057 }
4058
4059 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4060
4061 /// Generate the scalarized versions of the phi node as needed by their users.
4062 void execute(VPTransformState &State) override;
4063
4064 /// Return the cost of this VPScalarIVStepsRecipe.
4066 VPCostContext &Ctx) const override {
4067 // TODO: Compute accurate cost after retiring the legacy cost model.
4068 return 0;
4069 }
4070
4071 VPValue *getStepValue() const { return getOperand(1); }
4072
4073 /// Return the number of scalars to produce per unroll part, used to compute
4074 /// StartIndex during unrolling.
4075 VPValue *getVFValue() const { return getOperand(2); }
4076
4077 /// Return the StartIndex, or null if known to be zero, valid only after
4078 /// unrolling.
4080 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4081 }
4082
4083 /// Returns true if the recipe only uses the first lane of operand \p Op.
4084 bool usesFirstLaneOnly(const VPValue *Op) const override {
4086 "Op must be an operand of the recipe");
4087 return true;
4088 }
4089
4090protected:
4091#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4092 /// Print the recipe.
4093 void printRecipe(raw_ostream &O, const Twine &Indent,
4094 VPSlotTracker &SlotTracker) const override;
4095#endif
4096};
4097
4098/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
4099/// types implementing VPPhiAccessors. Used by isa<> & co.
4101 static inline bool isPossible(const VPRecipeBase *f) {
4102 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4104 }
4105};
4106/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
4107/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
4108template <typename SrcTy>
4109struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
4110
4112
4113 /// doCast is used by cast<>.
4114 static inline VPPhiAccessors *doCast(SrcTy R) {
4115 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
4116 switch (R->getVPRecipeID()) {
4117 case VPRecipeBase::VPInstructionSC:
4118 return cast<VPPhi>(R);
4119 case VPRecipeBase::VPIRInstructionSC:
4120 return cast<VPIRPhi>(R);
4121 case VPRecipeBase::VPWidenPHISC:
4122 return cast<VPWidenPHIRecipe>(R);
4123 default:
4124 return cast<VPHeaderPHIRecipe>(R);
4125 }
4126 }());
4127 }
4128
4129 /// doCastIfPossible is used by dyn_cast<>.
4130 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
4131 if (!Self::isPossible(f))
4132 return nullptr;
4133 return doCast(f);
4134 }
4135};
4136template <>
4139template <>
4142
4143/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
4144/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
4145namespace detail {
4146template <typename DstTy, typename RecipeBasePtrTy>
4147static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
4148 switch (R->getVPRecipeID()) {
4149 case VPRecipeBase::VPInstructionSC:
4150 return cast<VPInstruction>(R);
4151 case VPRecipeBase::VPWidenSC:
4152 return cast<VPWidenRecipe>(R);
4153 case VPRecipeBase::VPWidenCastSC:
4154 return cast<VPWidenCastRecipe>(R);
4155 case VPRecipeBase::VPWidenIntrinsicSC:
4157 case VPRecipeBase::VPWidenCallSC:
4158 return cast<VPWidenCallRecipe>(R);
4159 case VPRecipeBase::VPReplicateSC:
4160 return cast<VPReplicateRecipe>(R);
4161 case VPRecipeBase::VPInterleaveSC:
4162 case VPRecipeBase::VPInterleaveEVLSC:
4163 return cast<VPInterleaveBase>(R);
4164 case VPRecipeBase::VPWidenLoadSC:
4165 case VPRecipeBase::VPWidenLoadEVLSC:
4166 case VPRecipeBase::VPWidenStoreSC:
4167 case VPRecipeBase::VPWidenStoreEVLSC:
4168 return cast<VPWidenMemoryRecipe>(R);
4169 default:
4170 llvm_unreachable("invalid recipe for VPIRMetadata cast");
4171 }
4172}
4173} // namespace detail
4174
4175/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
4176/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
4177template <typename DstTy, typename SrcTy>
4178struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
4179 static inline bool isPossible(SrcTy R) {
4180 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
4181 // also handled in castToVPIRMetadata.
4186 R);
4187 }
4188
4189 using RetTy = DstTy *;
4190
4191 /// doCast is used by cast<>.
4192 static inline RetTy doCast(SrcTy R) {
4194 }
4195
4196 /// doCastIfPossible is used by dyn_cast<>.
4197 static inline RetTy doCastIfPossible(SrcTy R) {
4198 if (!isPossible(R))
4199 return nullptr;
4200 return doCast(R);
4201 }
4202};
4203template <>
4206template <>
4209
4210/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4211/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4212/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4213class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4214 friend class VPlan;
4215
4216 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4217 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4218 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4219 if (Recipe)
4220 appendRecipe(Recipe);
4221 }
4222
4223public:
4225
4226protected:
4227 /// The VPRecipes held in the order of output instructions to generate.
4229
4230 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4231 : VPBlockBase(BlockSC, Name.str()) {}
4232
4233public:
4234 ~VPBasicBlock() override {
4235 while (!Recipes.empty())
4236 Recipes.pop_back();
4237 }
4238
4239 /// Instruction iterators...
4244
4245 //===--------------------------------------------------------------------===//
4246 /// Recipe iterator methods
4247 ///
4248 inline iterator begin() { return Recipes.begin(); }
4249 inline const_iterator begin() const { return Recipes.begin(); }
4250 inline iterator end() { return Recipes.end(); }
4251 inline const_iterator end() const { return Recipes.end(); }
4252
4253 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4254 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4255 inline reverse_iterator rend() { return Recipes.rend(); }
4256 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4257
4258 inline size_t size() const { return Recipes.size(); }
4259 inline bool empty() const { return Recipes.empty(); }
4260 inline const VPRecipeBase &front() const { return Recipes.front(); }
4261 inline VPRecipeBase &front() { return Recipes.front(); }
4262 inline const VPRecipeBase &back() const { return Recipes.back(); }
4263 inline VPRecipeBase &back() { return Recipes.back(); }
4264
4265 /// Returns a reference to the list of recipes.
4267
4268 /// Returns a pointer to a member of the recipe list.
4269 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4270 return &VPBasicBlock::Recipes;
4271 }
4272
4273 /// Method to support type inquiry through isa, cast, and dyn_cast.
4274 static inline bool classof(const VPBlockBase *V) {
4275 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4276 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4277 }
4278
4279 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4280 assert(Recipe && "No recipe to append.");
4281 assert(!Recipe->Parent && "Recipe already in VPlan");
4282 Recipe->Parent = this;
4283 Recipes.insert(InsertPt, Recipe);
4284 }
4285
4286 /// Augment the existing recipes of a VPBasicBlock with an additional
4287 /// \p Recipe as the last recipe.
4288 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4289
4290 /// The method which generates the output IR instructions that correspond to
4291 /// this VPBasicBlock, thereby "executing" the VPlan.
4292 void execute(VPTransformState *State) override;
4293
4294 /// Return the cost of this VPBasicBlock.
4295 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4296
4297 /// Return the position of the first non-phi node recipe in the block.
4298 iterator getFirstNonPhi();
4299
4300 /// Returns an iterator range over the PHI-like recipes in the block.
4304
4305 /// Split current block at \p SplitAt by inserting a new block between the
4306 /// current block and its successors and moving all recipes starting at
4307 /// SplitAt to the new block. Returns the new block.
4308 VPBasicBlock *splitAt(iterator SplitAt);
4309
4310 VPRegionBlock *getEnclosingLoopRegion();
4311 const VPRegionBlock *getEnclosingLoopRegion() const;
4312
4313#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4314 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4315 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4316 ///
4317 /// Note that the numbering is applied to the whole VPlan, so printing
4318 /// individual blocks is consistent with the whole VPlan printing.
4319 void print(raw_ostream &O, const Twine &Indent,
4320 VPSlotTracker &SlotTracker) const override;
4321 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4322#endif
4323
4324 /// If the block has multiple successors, return the branch recipe terminating
4325 /// the block. If there are no or only a single successor, return nullptr;
4326 VPRecipeBase *getTerminator();
4327 const VPRecipeBase *getTerminator() const;
4328
4329 /// Returns true if the block is exiting it's parent region.
4330 bool isExiting() const;
4331
4332 /// Clone the current block and it's recipes, without updating the operands of
4333 /// the cloned recipes.
4334 VPBasicBlock *clone() override;
4335
4336 /// Returns the predecessor block at index \p Idx with the predecessors as per
4337 /// the corresponding plain CFG. If the block is an entry block to a region,
4338 /// the first predecessor is the single predecessor of a region, and the
4339 /// second predecessor is the exiting block of the region.
4340 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4341
4342protected:
4343 /// Execute the recipes in the IR basic block \p BB.
4344 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4345
4346 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4347 /// generated for this VPBB.
4348 void connectToPredecessors(VPTransformState &State);
4349
4350private:
4351 /// Create an IR BasicBlock to hold the output instructions generated by this
4352 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4353 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4354};
4355
4356inline const VPBasicBlock *
4358 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4359}
4360
4361/// A special type of VPBasicBlock that wraps an existing IR basic block.
4362/// Recipes of the block get added before the first non-phi instruction in the
4363/// wrapped block.
4364/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4365/// preheader block.
4366class VPIRBasicBlock : public VPBasicBlock {
4367 friend class VPlan;
4368
4369 BasicBlock *IRBB;
4370
4371 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4372 VPIRBasicBlock(BasicBlock *IRBB)
4373 : VPBasicBlock(VPIRBasicBlockSC,
4374 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4375 IRBB(IRBB) {}
4376
4377public:
4378 ~VPIRBasicBlock() override = default;
4379
4380 static inline bool classof(const VPBlockBase *V) {
4381 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4382 }
4383
4384 /// The method which generates the output IR instructions that correspond to
4385 /// this VPBasicBlock, thereby "executing" the VPlan.
4386 void execute(VPTransformState *State) override;
4387
4388 VPIRBasicBlock *clone() override;
4389
4390 BasicBlock *getIRBasicBlock() const { return IRBB; }
4391};
4392
4393/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4394/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4395/// A VPRegionBlock may indicate that its contents are to be replicated several
4396/// times. This is designed to support predicated scalarization, in which a
4397/// scalar if-then code structure needs to be generated VF * UF times. Having
4398/// this replication indicator helps to keep a single model for multiple
4399/// candidate VF's. The actual replication takes place only once the desired VF
4400/// and UF have been determined.
4401class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4402 friend class VPlan;
4403
4404 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4405 VPBlockBase *Entry;
4406
4407 /// Hold the Single Exiting block of the SESE region modelled by the
4408 /// VPRegionBlock.
4409 VPBlockBase *Exiting;
4410
4411 /// An indicator whether this region is to generate multiple replicated
4412 /// instances of output IR corresponding to its VPBlockBases.
4413 bool IsReplicator;
4414
4415 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4416 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4417 const std::string &Name = "", bool IsReplicator = false)
4418 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4419 IsReplicator(IsReplicator) {
4420 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4421 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4422 Entry->setParent(this);
4423 Exiting->setParent(this);
4424 }
4425 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4426 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4427 IsReplicator(IsReplicator) {}
4428
4429public:
4430 ~VPRegionBlock() override = default;
4431
4432 /// Method to support type inquiry through isa, cast, and dyn_cast.
4433 static inline bool classof(const VPBlockBase *V) {
4434 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4435 }
4436
4437 const VPBlockBase *getEntry() const { return Entry; }
4438 VPBlockBase *getEntry() { return Entry; }
4439
4440 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4441 /// EntryBlock must have no predecessors.
4442 void setEntry(VPBlockBase *EntryBlock) {
4443 assert(EntryBlock->getPredecessors().empty() &&
4444 "Entry block cannot have predecessors.");
4445 Entry = EntryBlock;
4446 EntryBlock->setParent(this);
4447 }
4448
4449 const VPBlockBase *getExiting() const { return Exiting; }
4450 VPBlockBase *getExiting() { return Exiting; }
4451
4452 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4453 /// ExitingBlock must have no successors.
4454 void setExiting(VPBlockBase *ExitingBlock) {
4455 assert(ExitingBlock->getSuccessors().empty() &&
4456 "Exit block cannot have successors.");
4457 Exiting = ExitingBlock;
4458 ExitingBlock->setParent(this);
4459 }
4460
4461 /// Returns the pre-header VPBasicBlock of the loop region.
4463 assert(!isReplicator() && "should only get pre-header of loop regions");
4464 return getSinglePredecessor()->getExitingBasicBlock();
4465 }
4466
4467 /// An indicator whether this region is to generate multiple replicated
4468 /// instances of output IR corresponding to its VPBlockBases.
4469 bool isReplicator() const { return IsReplicator; }
4470
4471 /// The method which generates the output IR instructions that correspond to
4472 /// this VPRegionBlock, thereby "executing" the VPlan.
4473 void execute(VPTransformState *State) override;
4474
4475 // Return the cost of this region.
4476 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4477
4478#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4479 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4480 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4481 /// consequtive numbers.
4482 ///
4483 /// Note that the numbering is applied to the whole VPlan, so printing
4484 /// individual regions is consistent with the whole VPlan printing.
4485 void print(raw_ostream &O, const Twine &Indent,
4486 VPSlotTracker &SlotTracker) const override;
4487 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4488#endif
4489
4490 /// Clone all blocks in the single-entry single-exit region of the block and
4491 /// their recipes without updating the operands of the cloned recipes.
4492 VPRegionBlock *clone() override;
4493
4494 /// Remove the current region from its VPlan, connecting its predecessor to
4495 /// its entry, and its exiting block to its successor.
4496 void dissolveToCFGLoop();
4497
4498 /// Returns the canonical induction recipe of the region.
4500 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4501 if (EntryVPBB->empty()) {
4502 // VPlan native path. TODO: Unify both code paths.
4503 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4504 }
4505 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4506 }
4508 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4509 }
4510
4511 /// Return the type of the canonical IV for loop regions.
4512 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4513 const Type *getCanonicalIVType() const {
4514 return getCanonicalIV()->getScalarType();
4515 }
4516};
4517
4519 return getParent()->getParent();
4520}
4521
4523 return getParent()->getParent();
4524}
4525
4526/// VPlan models a candidate for vectorization, encoding various decisions take
4527/// to produce efficient output IR, including which branches, basic-blocks and
4528/// output IR instructions to generate, and their cost. VPlan holds a
4529/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4530/// VPBasicBlock.
4531class VPlan {
4532 friend class VPlanPrinter;
4533 friend class VPSlotTracker;
4534
4535 /// VPBasicBlock corresponding to the original preheader. Used to place
4536 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4537 /// rest of VPlan execution.
4538 /// When this VPlan is used for the epilogue vector loop, the entry will be
4539 /// replaced by a new entry block created during skeleton creation.
4540 VPBasicBlock *Entry;
4541
4542 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4543 VPIRBasicBlock *ScalarHeader;
4544
4545 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4546 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4547 /// e.g. if the scalar epilogue always executes.
4549
4550 /// Holds the VFs applicable to this VPlan.
4552
4553 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4554 /// any UF.
4556
4557 /// Holds the name of the VPlan, for printing.
4558 std::string Name;
4559
4560 /// Represents the trip count of the original loop, for folding
4561 /// the tail.
4562 VPValue *TripCount = nullptr;
4563
4564 /// Represents the backedge taken count of the original loop, for folding
4565 /// the tail. It equals TripCount - 1.
4566 VPSymbolicValue *BackedgeTakenCount = nullptr;
4567
4568 /// Represents the vector trip count.
4569 VPSymbolicValue VectorTripCount;
4570
4571 /// Represents the vectorization factor of the loop.
4572 VPSymbolicValue VF;
4573
4574 /// Represents the unroll factor of the loop.
4575 VPSymbolicValue UF;
4576
4577 /// Represents the loop-invariant VF * UF of the vector loop region.
4578 VPSymbolicValue VFxUF;
4579
4580 /// Contains all the external definitions created for this VPlan, as a mapping
4581 /// from IR Values to VPIRValues.
4583
4584 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4585 /// VPlan is destroyed.
4586 SmallVector<VPBlockBase *> CreatedBlocks;
4587
4588 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4589 /// wrapping the original header of the scalar loop.
4590 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4591 : Entry(Entry), ScalarHeader(ScalarHeader) {
4592 Entry->setPlan(this);
4593 assert(ScalarHeader->getNumSuccessors() == 0 &&
4594 "scalar header must be a leaf node");
4595 }
4596
4597public:
4598 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4599 /// original preheader and scalar header of \p L, to be used as entry and
4600 /// scalar header blocks of the new VPlan.
4601 VPlan(Loop *L);
4602
4603 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4604 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4605 VPlan(BasicBlock *ScalarHeaderBB) {
4606 setEntry(createVPBasicBlock("preheader"));
4607 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4608 }
4609
4611
4613 Entry = VPBB;
4614 VPBB->setPlan(this);
4615 }
4616
4617 /// Generate the IR code for this VPlan.
4618 void execute(VPTransformState *State);
4619
4620 /// Return the cost of this plan.
4622
4623 VPBasicBlock *getEntry() { return Entry; }
4624 const VPBasicBlock *getEntry() const { return Entry; }
4625
4626 /// Returns the preheader of the vector loop region, if one exists, or null
4627 /// otherwise.
4629 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4630 return VectorRegion
4631 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4632 : nullptr;
4633 }
4634
4635 /// Returns the VPRegionBlock of the vector loop.
4638
4639 /// Returns the 'middle' block of the plan, that is the block that selects
4640 /// whether to execute the scalar tail loop or the exit block from the loop
4641 /// latch. If there is an early exit from the vector loop, the middle block
4642 /// conceptully has the early exit block as third successor, split accross 2
4643 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4644 /// tail loop or the exit block. If the scalar tail loop or exit block are
4645 /// known to always execute, the middle block may branch directly to that
4646 /// block. This function cannot be called once the vector loop region has been
4647 /// removed.
4649 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4650 assert(
4651 LoopRegion &&
4652 "cannot call the function after vector loop region has been removed");
4653 // The middle block is always the last successor of the region.
4654 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4655 }
4656
4658 return const_cast<VPlan *>(this)->getMiddleBlock();
4659 }
4660
4661 /// Return the VPBasicBlock for the preheader of the scalar loop.
4663 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4664 }
4665
4666 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4667 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4668
4669 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4670 /// the original scalar loop.
4671 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4672
4673 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4674 /// exit block.
4676
4677 /// Returns true if \p VPBB is an exit block.
4678 bool isExitBlock(VPBlockBase *VPBB);
4679
4680 /// The trip count of the original loop.
4682 assert(TripCount && "trip count needs to be set before accessing it");
4683 return TripCount;
4684 }
4685
4686 /// Set the trip count assuming it is currently null; if it is not - use
4687 /// resetTripCount().
4688 void setTripCount(VPValue *NewTripCount) {
4689 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4690 TripCount = NewTripCount;
4691 }
4692
4693 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4694 /// the original trip count have been replaced.
4695 void resetTripCount(VPValue *NewTripCount) {
4696 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4697 "TripCount must be set when resetting");
4698 TripCount = NewTripCount;
4699 }
4700
4701 /// The backedge taken count of the original loop.
4703 if (!BackedgeTakenCount)
4704 BackedgeTakenCount = new VPSymbolicValue();
4705 return BackedgeTakenCount;
4706 }
4707 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4708
4709 /// The vector trip count.
4710 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4711
4712 /// Returns the VF of the vector loop region.
4713 VPValue &getVF() { return VF; };
4714 const VPValue &getVF() const { return VF; };
4715
4716 /// Returns the UF of the vector loop region.
4717 VPValue &getUF() { return UF; };
4718
4719 /// Returns VF * UF of the vector loop region.
4720 VPValue &getVFxUF() { return VFxUF; }
4721
4724 }
4725
4726 void addVF(ElementCount VF) { VFs.insert(VF); }
4727
4729 assert(hasVF(VF) && "Cannot set VF not already in plan");
4730 VFs.clear();
4731 VFs.insert(VF);
4732 }
4733
4734 /// Remove \p VF from the plan.
4736 assert(hasVF(VF) && "tried to remove VF not present in plan");
4737 VFs.remove(VF);
4738 }
4739
4740 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4741 bool hasScalableVF() const {
4742 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4743 }
4744
4745 /// Returns an iterator range over all VFs of the plan.
4748 return VFs;
4749 }
4750
4751 bool hasScalarVFOnly() const {
4752 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4753 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4754 "Plan with scalar VF should only have a single VF");
4755 return HasScalarVFOnly;
4756 }
4757
4758 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4759
4760 /// Returns the concrete UF of the plan, after unrolling.
4761 unsigned getConcreteUF() const {
4762 assert(UFs.size() == 1 && "Expected a single UF");
4763 return UFs[0];
4764 }
4765
4766 void setUF(unsigned UF) {
4767 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4768 UFs.clear();
4769 UFs.insert(UF);
4770 }
4771
4772 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4773 /// concrete UF.
4774 bool isUnrolled() const { return UFs.size() == 1; }
4775
4776 /// Return a string with the name of the plan and the applicable VFs and UFs.
4777 std::string getName() const;
4778
4779 void setName(const Twine &newName) { Name = newName.str(); }
4780
4781 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4782 /// yet) for \p V.
4784 assert(V && "Trying to get or add the VPIRValue of a null Value");
4785 auto [It, Inserted] = LiveIns.try_emplace(V);
4786 if (Inserted) {
4787 if (auto *CI = dyn_cast<ConstantInt>(V))
4788 It->second = new VPConstantInt(CI);
4789 else
4790 It->second = new VPIRValue(V);
4791 }
4792
4793 assert(isa<VPIRValue>(It->second) &&
4794 "Only VPIRValues should be in mapping");
4795 return It->second;
4796 }
4798 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4799 return getOrAddLiveIn(V->getValue());
4800 }
4801
4802 /// Return a VPIRValue wrapping i1 true.
4803 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4804
4805 /// Return a VPIRValue wrapping i1 false.
4806 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4807
4808 /// Return a VPIRValue wrapping the null value of type \p Ty.
4809 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4810
4811 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4813 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4814 }
4815
4816 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4817 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4818 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4819 }
4820
4821 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4822 /// value.
4824 bool IsSigned = false) {
4825 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4826 }
4827
4828 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4830 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4831 }
4832
4833 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4834 /// otherwise.
4835 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4836
4837 /// Return the list of live-in VPValues available in the VPlan.
4838 auto getLiveIns() const { return LiveIns.values(); }
4839
4840#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4841 /// Print the live-ins of this VPlan to \p O.
4842 void printLiveIns(raw_ostream &O) const;
4843
4844 /// Print this VPlan to \p O.
4845 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4846
4847 /// Print this VPlan in DOT format to \p O.
4848 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4849
4850 /// Dump the plan to stderr (for debugging).
4851 LLVM_DUMP_METHOD void dump() const;
4852#endif
4853
4854 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4855 /// recipes to refer to the clones, and return it.
4857
4858 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4859 /// present. The returned block is owned by the VPlan and deleted once the
4860 /// VPlan is destroyed.
4862 VPRecipeBase *Recipe = nullptr) {
4863 auto *VPB = new VPBasicBlock(Name, Recipe);
4864 CreatedBlocks.push_back(VPB);
4865 return VPB;
4866 }
4867
4868 /// Create a new loop region with \p Name and entry and exiting blocks set
4869 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4870 /// owned by the VPlan and deleted once the VPlan is destroyed.
4871 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4872 VPBlockBase *Entry = nullptr,
4873 VPBlockBase *Exiting = nullptr) {
4874 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4875 : new VPRegionBlock(Name);
4876 CreatedBlocks.push_back(VPB);
4877 return VPB;
4878 }
4879
4880 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4881 /// returned block is owned by the VPlan and deleted once the VPlan is
4882 /// destroyed.
4884 const std::string &Name = "") {
4885 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4886 CreatedBlocks.push_back(VPB);
4887 return VPB;
4888 }
4889
4890 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4891 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4892 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4894
4895 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4896 /// instructions in \p IRBB, except its terminator which is managed by the
4897 /// successors of the block in VPlan. The returned block is owned by the VPlan
4898 /// and deleted once the VPlan is destroyed.
4900
4901 /// Returns true if the VPlan is based on a loop with an early exit. That is
4902 /// the case if the VPlan has either more than one exit block or a single exit
4903 /// block with multiple predecessors (one for the exit via the latch and one
4904 /// via the other early exit).
4905 bool hasEarlyExit() const {
4906 return count_if(ExitBlocks,
4907 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4908 1 ||
4909 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4910 }
4911
4912 /// Returns true if the scalar tail may execute after the vector loop. Note
4913 /// that this relies on unneeded branches to the scalar tail loop being
4914 /// removed.
4915 bool hasScalarTail() const {
4916 return !(!getScalarPreheader()->hasPredecessors() ||
4918 }
4919};
4920
4921#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4922inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4923 Plan.print(OS);
4924 return OS;
4925}
4926#endif
4927
4928} // end namespace llvm
4929
4930#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:570
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3854
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3848
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4213
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4241
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4288
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4243
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4240
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4266
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4224
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4230
iterator end()
Definition VPlan.h:4250
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4248
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4242
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4301
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:786
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
~VPBasicBlock() override
Definition VPlan.h:4234
const_reverse_iterator rbegin() const
Definition VPlan.h:4254
reverse_iterator rend()
Definition VPlan.h:4255
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4228
VPRecipeBase & back()
Definition VPlan.h:4263
const VPRecipeBase & front() const
Definition VPlan.h:4260
const_iterator begin() const
Definition VPlan.h:4249
VPRecipeBase & front()
Definition VPlan.h:4261
const VPRecipeBase & back() const
Definition VPlan.h:4262
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4279
bool empty() const
Definition VPlan.h:4259
const_iterator end() const
Definition VPlan.h:4251
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4274
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4269
reverse_iterator rbegin()
Definition VPlan.h:4253
friend class VPlan
Definition VPlan.h:4214
size_t size() const
Definition VPlan.h:4258
const_reverse_iterator rend() const
Definition VPlan.h:4256
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2782
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2787
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2757
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2777
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2798
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2807
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2764
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2793
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2773
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:82
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:301
VPRegionBlock * getParent()
Definition VPlan.h:174
VPBlocksTy & getPredecessors()
Definition VPlan.h:206
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:203
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:371
void setName(const Twine &newName)
Definition VPlan.h:167
size_t getNumSuccessors() const
Definition VPlan.h:220
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:202
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:224
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:323
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:161
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:259
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:336
size_t getNumPredecessors() const
Definition VPlan.h:221
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:292
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:224
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:329
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:205
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:159
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:196
const VPRegionBlock * getParent() const
Definition VPlan.h:175
const std::string & getName() const
Definition VPlan.h:165
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:311
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:249
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:283
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:216
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:243
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:308
friend class VPBlockUtils
Definition VPlan.h:83
unsigned getVPBlockID() const
Definition VPlan.h:172
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:350
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:315
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:151
VPBlocksTy & getSuccessors()
Definition VPlan.h:200
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:216
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:272
void setParent(VPRegionBlock *P)
Definition VPlan.h:185
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:265
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:210
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:199
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3276
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3260
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3284
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3257
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3788
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3790
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3816
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3796
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3823
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3810
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3813
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3804
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3830
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3886
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3880
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3898
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3892
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3905
~VPCurrentIterationPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:4005
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3997
VPValue * getStepValue() const
Definition VPlan.h:4006
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3969
Type * getScalarType() const
Definition VPlan.h:4003
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3985
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4009
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3977
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3763
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3768
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3754
const SCEV * getSCEV() const
Definition VPlan.h:3774
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3759
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3411
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3393
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3375
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3363
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3349
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3341
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3345
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3405
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3343
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2266
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2270
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2283
static bool classof(const VPValue *V)
Definition VPlan.h:2280
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2306
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2311
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2295
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2303
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2276
VPValue * getStartValue() const
Definition VPlan.h:2298
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2315
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2018
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2035
unsigned getOpcode() const
Definition VPlan.h:2031
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2011
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4366
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4390
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4380
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4367
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
Class to record and manage LLVM IR flags.
Definition VPlan.h:671
FastMathFlagsTy FMFs
Definition VPlan.h:759
ReductionFlagsTy ReductionFlags
Definition VPlan.h:761
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:852
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:832
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:818
WrapFlagsTy WrapFlags
Definition VPlan.h:753
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:811
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:976
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1026
TruncFlagsTy TruncFlags
Definition VPlan.h:754
CmpInst::Predicate getPredicate() const
Definition VPlan.h:948
uint8_t AllFlags[2]
Definition VPlan.h:762
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:984
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:857
ExactFlagsTy ExactFlags
Definition VPlan.h:756
bool hasNoSignedWrap() const
Definition VPlan.h:1003
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1014
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:823
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:828
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:837
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:806
uint8_t GEPFlagsStorage
Definition VPlan.h:757
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:842
bool isNonNeg() const
Definition VPlan.h:986
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:966
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:971
DisjointFlagsTy DisjointFlags
Definition VPlan.h:755
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:956
bool hasNoUnsignedWrap() const
Definition VPlan.h:992
FCmpFlagsTy FCmpFlags
Definition VPlan.h:760
NonNegFlagsTy NonNegFlags
Definition VPlan.h:758
bool isReductionInLoop() const
Definition VPlan.h:1032
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:868
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:905
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:847
uint8_t CmpPredStorage
Definition VPlan.h:752
RecurKind getRecurKind() const
Definition VPlan.h:1020
VPIRFlags(Instruction &I)
Definition VPlan.h:768
Instruction & getInstruction() const
Definition VPlan.h:1682
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1690
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1669
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1696
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1684
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1657
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1136
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1172
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1144
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1156
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1487
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1529
static bool classof(const VPUser *R)
Definition VPlan.h:1514
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1495
Type * getResultType() const
Definition VPlan.h:1535
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1518
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1191
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1419
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1439
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1361
@ ExtractLastActive
Extracts the lane from the first operand corresponding to the last active (non-zero) lane in the mask...
Definition VPlan.h:1300
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1293
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1307
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1238
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1283
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1296
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1235
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1287
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1230
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1227
@ VScale
Returns the value for vscale.
Definition VPlan.h:1303
@ CanonicalIVIncrementForPart
Definition VPlan.h:1211
bool hasResult() const
Definition VPlan.h:1385
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1442
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1424
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1464
unsigned getOpcode() const
Definition VPlan.h:1369
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1467
friend class VPlanSlp
Definition VPlan.h:1192
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1433
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1409
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2894
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2900
static bool classof(const VPUser *U)
Definition VPlan.h:2876
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2842
Instruction * getInsertPos() const
Definition VPlan.h:2898
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2871
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2896
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2888
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2917
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2882
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2969
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:2997
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:2991
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3004
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2984
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2971
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2927
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2954
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2937
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2948
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2929
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1547
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1576
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1571
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4357
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1596
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1556
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1581
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1585
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3468
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3450
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3461
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3446
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:388
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:532
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4518
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:543
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:463
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:537
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:512
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:390
const VPBasicBlock * getParent() const
Definition VPlan.h:464
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:517
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:509
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCanonicalIVPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCanonicalIVPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:406
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:453
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:232
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:143
friend class VPValue
Definition VPlanValue.h:233
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3152
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3131
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3155
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3142
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2718
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2704
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2683
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2697
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2730
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2712
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2671
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2721
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2735
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2727
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2715
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3020
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3029
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3094
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3063
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3078
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3105
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3107
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3090
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3043
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3092
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3049
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3096
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3103
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3098
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3057
static bool classof(const VPUser *U)
Definition VPlan.h:3068
static bool classof(const VPValue *VPV)
Definition VPlan.h:3073
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3112
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4401
const VPBlockBase * getEntry() const
Definition VPlan.h:4437
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4512
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4469
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4454
VPBlockBase * getExiting()
Definition VPlan.h:4450
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4499
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4442
const Type * getCanonicalIVType() const
Definition VPlan.h:4513
const VPBlockBase * getExiting() const
Definition VPlan.h:4449
VPBlockBase * getEntry()
Definition VPlan.h:4438
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4507
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4462
friend class VPlan
Definition VPlan.h:4402
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4433
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3174
bool isSingleScalar() const
Definition VPlan.h:3215
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3182
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3227
bool isPredicated() const
Definition VPlan.h:3217
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3196
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3220
unsigned getOpcode() const
Definition VPlan.h:3244
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3239
VPValue * getStepValue() const
Definition VPlan.h:4071
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4065
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4041
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4053
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4079
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4075
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4034
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4084
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:589
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:595
static bool classof(const VPValue *V)
Definition VPlan.h:644
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:657
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:599
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:660
static bool classof(const VPUser *U)
Definition VPlan.h:649
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:591
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1124
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:258
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1446
operand_range operands()
Definition VPlanValue.h:326
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:302
unsigned getNumOperands() const
Definition VPlanValue.h:296
operand_iterator op_end()
Definition VPlanValue.h:324
operand_iterator op_begin()
Definition VPlanValue.h:322
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:297
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:277
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:320
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:319
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:137
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
friend class VPRecipeValue
Definition VPlanValue.h:50
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:71
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:172
unsigned getNumUsers() const
Definition VPlanValue.h:104
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2144
VPValue * getVFValue() const
Definition VPlan.h:2133
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2130
int64_t getStride() const
Definition VPlan.h:2131
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2165
VPValue * getOffset() const
Definition VPlan.h:2134
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2158
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2120
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2151
VPValue * getPointer() const
Definition VPlan.h:2132
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2202
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2204
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2211
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2189
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2227
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2218
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1950
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1957
const_operand_range args() const
Definition VPlan.h:1991
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1972
operand_range args()
Definition VPlan.h:1990
Function * getCalledScalarFunction() const
Definition VPlan.h:1986
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3941
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3928
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3923
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1798
Instruction::CastOps getOpcode() const
Definition VPlan.h:1836
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1839
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1806
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1821
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2082
Type * getSourceElementType() const
Definition VPlan.h:2087
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2090
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2074
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2060
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2398
static bool classof(const VPValue *V)
Definition VPlan.h:2346
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2365
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2383
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2358
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2373
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2376
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2334
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2361
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2381
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2390
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2341
const VPValue * getVFValue() const
Definition VPlan.h:2368
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2351
const VPValue * getStepValue() const
Definition VPlan.h:2362
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2459
const TruncInst * getTruncInst() const
Definition VPlan.h:2475
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2453
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2463
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2445
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2419
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2474
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2428
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2490
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2470
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2483
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1850
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1881
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1921
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1930
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1867
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1936
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1902
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1933
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1924
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3499
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3496
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3539
static bool classof(const VPUser *U)
Definition VPlan.h:3533
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3562
Instruction & Ingredient
Definition VPlan.h:3487
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3522
Instruction & getIngredient() const
Definition VPlan.h:3570
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3493
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3526
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3553
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3490
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3549
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3509
void setMask(VPValue *Mask)
Definition VPlan.h:3501
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3559
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3546
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3543
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2589
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2555
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2563
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2517
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2526
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2507
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1742
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1762
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1789
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1746
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1754
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1779
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4531
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4835
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1134
friend class VPSlotTracker
Definition VPlan.h:4533
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1110
bool hasVF(ElementCount VF) const
Definition VPlan.h:4740
LLVMContext & getContext() const
Definition VPlan.h:4722
VPBasicBlock * getEntry()
Definition VPlan.h:4623
void setName(const Twine &newName)
Definition VPlan.h:4779
bool hasScalableVF() const
Definition VPlan.h:4741
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4720
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4713
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4681
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4702
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4747
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:905
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:883
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4797
const VPValue & getVF() const
Definition VPlan.h:4714
VPValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4717
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:913
const VPBasicBlock * getEntry() const
Definition VPlan.h:4624
friend class VPlanPrinter
Definition VPlan.h:4532
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4806
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4829
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4812
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4883
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1246
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4838
bool hasUF(unsigned UF) const
Definition VPlan.h:4758
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4671
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4710
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4707
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4783
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4809
void setVF(ElementCount VF)
Definition VPlan.h:4728
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4774
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1033
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4905
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1015
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4761
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4823
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4657
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4688
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4695
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4648
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4612
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4861
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1252
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4735
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4803
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4871
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1140
bool hasScalarVFOnly() const
Definition VPlan.h:4751
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4662
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:923
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1093
void addVF(ElementCount VF)
Definition VPlan.h:4726
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4667
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1049
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4628
void setUF(unsigned UF)
Definition VPlan.h:4766
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4915
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1181
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4605
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4817
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2507
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:189
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:4147
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2644
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:313
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2642
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:4178
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:4192
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4197
static bool isPossible(SrcTy R)
Definition VPlan.h:4179
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:4109
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4130
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:4111
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:4114
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:4101
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2636
Possible variants of a reduction.
Definition VPlan.h:2634
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2639
unsigned VFScaleFactor
Definition VPlan.h:2640
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:201
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2605
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2617
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2596
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:703
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:708
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:698
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:691
PHINode & getIRPhi()
Definition VPlan.h:1723
VPIRPhi(PHINode &PN)
Definition VPlan.h:1716
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1718
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1734
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:141
static bool classof(const VPUser *U)
Definition VPlan.h:1615
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1611
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1630
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1645
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1625
static bool classof(const VPValue *V)
Definition VPlan.h:1620
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1078
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1111
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1084
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1079
static bool classof(const VPValue *V)
Definition VPlan.h:1104
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1099
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:223
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3618
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3631
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3619
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3641
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3576
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3598
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3577
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3586
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3703
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3715
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3704
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3728
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3718
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3659
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3678
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3669
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3684
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3660