LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class VPlanSlp;
69class Value;
71
72struct VPCostContext;
73
74namespace Intrinsic {
75typedef unsigned ID;
76}
77
78using VPlanPtr = std::unique_ptr<VPlan>;
79
80/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
81/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
83 friend class VPBlockUtils;
84
85 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
86
87 /// An optional name for the block.
88 std::string Name;
89
90 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
91 /// it is a topmost VPBlockBase.
92 VPRegionBlock *Parent = nullptr;
93
94 /// List of predecessor blocks.
96
97 /// List of successor blocks.
99
100 /// VPlan containing the block. Can only be set on the entry block of the
101 /// plan.
102 VPlan *Plan = nullptr;
103
104 /// Add \p Successor as the last successor to this block.
105 void appendSuccessor(VPBlockBase *Successor) {
106 assert(Successor && "Cannot add nullptr successor!");
107 Successors.push_back(Successor);
108 }
109
110 /// Add \p Predecessor as the last predecessor to this block.
111 void appendPredecessor(VPBlockBase *Predecessor) {
112 assert(Predecessor && "Cannot add nullptr predecessor!");
113 Predecessors.push_back(Predecessor);
114 }
115
116 /// Remove \p Predecessor from the predecessors of this block.
117 void removePredecessor(VPBlockBase *Predecessor) {
118 auto Pos = find(Predecessors, Predecessor);
119 assert(Pos && "Predecessor does not exist");
120 Predecessors.erase(Pos);
121 }
122
123 /// Remove \p Successor from the successors of this block.
124 void removeSuccessor(VPBlockBase *Successor) {
125 auto Pos = find(Successors, Successor);
126 assert(Pos && "Successor does not exist");
127 Successors.erase(Pos);
128 }
129
130 /// This function replaces one predecessor with another, useful when
131 /// trying to replace an old block in the CFG with a new one.
132 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
133 auto I = find(Predecessors, Old);
134 assert(I != Predecessors.end());
135 assert(Old->getParent() == New->getParent() &&
136 "replaced predecessor must have the same parent");
137 *I = New;
138 }
139
140 /// This function replaces one successor with another, useful when
141 /// trying to replace an old block in the CFG with a new one.
142 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
143 auto I = find(Successors, Old);
144 assert(I != Successors.end());
145 assert(Old->getParent() == New->getParent() &&
146 "replaced successor must have the same parent");
147 *I = New;
148 }
149
150protected:
151 VPBlockBase(const unsigned char SC, const std::string &N)
152 : SubclassID(SC), Name(N) {}
153
154public:
155 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
156 /// that are actually instantiated. Values of this enumeration are kept in the
157 /// SubclassID field of the VPBlockBase objects. They are used for concrete
158 /// type identification.
159 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
160
162
163 virtual ~VPBlockBase() = default;
164
165 const std::string &getName() const { return Name; }
166
167 void setName(const Twine &newName) { Name = newName.str(); }
168
169 /// \return an ID for the concrete type of this object.
170 /// This is used to implement the classof checks. This should not be used
171 /// for any other purpose, as the values may change as LLVM evolves.
172 unsigned getVPBlockID() const { return SubclassID; }
173
174 VPRegionBlock *getParent() { return Parent; }
175 const VPRegionBlock *getParent() const { return Parent; }
176
177 /// \return A pointer to the plan containing the current block.
178 VPlan *getPlan();
179 const VPlan *getPlan() const;
180
181 /// Sets the pointer of the plan containing the block. The block must be the
182 /// entry block into the VPlan.
183 void setPlan(VPlan *ParentPlan);
184
185 void setParent(VPRegionBlock *P) { Parent = P; }
186
187 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
188 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
189 /// VPBlockBase is a VPBasicBlock, it is returned.
190 const VPBasicBlock *getEntryBasicBlock() const;
191 VPBasicBlock *getEntryBasicBlock();
192
193 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
194 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
195 /// VPBlockBase is a VPBasicBlock, it is returned.
196 const VPBasicBlock *getExitingBasicBlock() const;
197 VPBasicBlock *getExitingBasicBlock();
198
199 const VPBlocksTy &getSuccessors() const { return Successors; }
200 VPBlocksTy &getSuccessors() { return Successors; }
201
204
205 const VPBlocksTy &getPredecessors() const { return Predecessors; }
206 VPBlocksTy &getPredecessors() { return Predecessors; }
207
208 /// \return the successor of this VPBlockBase if it has a single successor.
209 /// Otherwise return a null pointer.
211 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
212 }
213
214 /// \return the predecessor of this VPBlockBase if it has a single
215 /// predecessor. Otherwise return a null pointer.
217 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
218 }
219
220 size_t getNumSuccessors() const { return Successors.size(); }
221 size_t getNumPredecessors() const { return Predecessors.size(); }
222
223 /// Returns true if this block has any predecessors.
224 bool hasPredecessors() const { return !Predecessors.empty(); }
225
226 /// An Enclosing Block of a block B is any block containing B, including B
227 /// itself. \return the closest enclosing block starting from "this", which
228 /// has successors. \return the root enclosing block if all enclosing blocks
229 /// have no successors.
230 VPBlockBase *getEnclosingBlockWithSuccessors();
231
232 /// \return the closest enclosing block starting from "this", which has
233 /// predecessors. \return the root enclosing block if all enclosing blocks
234 /// have no predecessors.
235 VPBlockBase *getEnclosingBlockWithPredecessors();
236
237 /// \return the successors either attached directly to this VPBlockBase or, if
238 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
239 /// successors of its own, search recursively for the first enclosing
240 /// VPRegionBlock that has successors and return them. If no such
241 /// VPRegionBlock exists, return the (empty) successors of the topmost
242 /// VPBlockBase reached.
244 return getEnclosingBlockWithSuccessors()->getSuccessors();
245 }
246
247 /// \return the hierarchical successor of this VPBlockBase if it has a single
248 /// hierarchical successor. Otherwise return a null pointer.
250 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
251 }
252
253 /// \return the predecessors either attached directly to this VPBlockBase or,
254 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
255 /// predecessors of its own, search recursively for the first enclosing
256 /// VPRegionBlock that has predecessors and return them. If no such
257 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
258 /// VPBlockBase reached.
260 return getEnclosingBlockWithPredecessors()->getPredecessors();
261 }
262
263 /// \return the hierarchical predecessor of this VPBlockBase if it has a
264 /// single hierarchical predecessor. Otherwise return a null pointer.
268
269 /// Set a given VPBlockBase \p Successor as the single successor of this
270 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
271 /// This VPBlockBase must have no successors.
273 assert(Successors.empty() && "Setting one successor when others exist.");
274 assert(Successor->getParent() == getParent() &&
275 "connected blocks must have the same parent");
276 appendSuccessor(Successor);
277 }
278
279 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
280 /// successors of this VPBlockBase. This VPBlockBase is not added as
281 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
282 /// successors.
283 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
284 assert(Successors.empty() && "Setting two successors when others exist.");
285 appendSuccessor(IfTrue);
286 appendSuccessor(IfFalse);
287 }
288
289 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
290 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
291 /// as successor of any VPBasicBlock in \p NewPreds.
293 assert(Predecessors.empty() && "Block predecessors already set.");
294 for (auto *Pred : NewPreds)
295 appendPredecessor(Pred);
296 }
297
298 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
299 /// This VPBlockBase must have no successors. This VPBlockBase is not added
300 /// as predecessor of any VPBasicBlock in \p NewSuccs.
302 assert(Successors.empty() && "Block successors already set.");
303 for (auto *Succ : NewSuccs)
304 appendSuccessor(Succ);
305 }
306
307 /// Remove all the predecessor of this block.
308 void clearPredecessors() { Predecessors.clear(); }
309
310 /// Remove all the successors of this block.
311 void clearSuccessors() { Successors.clear(); }
312
313 /// Swap predecessors of the block. The block must have exactly 2
314 /// predecessors.
316 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
317 std::swap(Predecessors[0], Predecessors[1]);
318 }
319
320 /// Swap successors of the block. The block must have exactly 2 successors.
321 // TODO: This should be part of introducing conditional branch recipes rather
322 // than being independent.
324 assert(Successors.size() == 2 && "must have 2 successors to swap");
325 std::swap(Successors[0], Successors[1]);
326 }
327
328 /// Returns the index for \p Pred in the blocks predecessors list.
329 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
330 assert(count(Predecessors, Pred) == 1 &&
331 "must have Pred exactly once in Predecessors");
332 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
333 }
334
335 /// Returns the index for \p Succ in the blocks successor list.
336 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
337 assert(count(Successors, Succ) == 1 &&
338 "must have Succ exactly once in Successors");
339 return std::distance(Successors.begin(), find(Successors, Succ));
340 }
341
342 /// The method which generates the output IR that correspond to this
343 /// VPBlockBase, thereby "executing" the VPlan.
344 virtual void execute(VPTransformState *State) = 0;
345
346 /// Return the cost of the block.
348
349#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
350 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
351 OS << getName();
352 }
353
354 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
355 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
356 /// consequtive numbers.
357 ///
358 /// Note that the numbering is applied to the whole VPlan, so printing
359 /// individual blocks is consistent with the whole VPlan printing.
360 virtual void print(raw_ostream &O, const Twine &Indent,
361 VPSlotTracker &SlotTracker) const = 0;
362
363 /// Print plain-text dump of this VPlan to \p O.
364 void print(raw_ostream &O) const;
365
366 /// Print the successors of this block to \p O, prefixing all lines with \p
367 /// Indent.
368 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
369
370 /// Dump this VPBlockBase to dbgs().
371 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
372#endif
373
374 /// Clone the current block and it's recipes without updating the operands of
375 /// the cloned recipes, including all blocks in the single-entry single-exit
376 /// region for VPRegionBlocks.
377 virtual VPBlockBase *clone() = 0;
378};
379
380/// VPRecipeBase is a base class modeling a sequence of one or more output IR
381/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
382/// and is responsible for deleting its defined values. Single-value
383/// recipes must inherit from VPSingleDef instead of inheriting from both
384/// VPRecipeBase and VPValue separately.
386 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
387 public VPDef,
388 public VPUser {
389 friend VPBasicBlock;
390 friend class VPBlockUtils;
391
392 /// Subclass identifier (for isa/dyn_cast).
393 const unsigned char SubclassID;
394
395 /// Each VPRecipe belongs to a single VPBasicBlock.
396 VPBasicBlock *Parent = nullptr;
397
398 /// The debug location for the recipe.
399 DebugLoc DL;
400
401public:
402 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
403 /// that is actually instantiated. Values of this enumeration are kept in the
404 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
405 /// type identification.
406 using VPRecipeTy = enum {
407 VPBranchOnMaskSC,
408 VPDerivedIVSC,
409 VPExpandSCEVSC,
410 VPExpressionSC,
411 VPIRInstructionSC,
412 VPInstructionSC,
413 VPInterleaveEVLSC,
414 VPInterleaveSC,
415 VPReductionEVLSC,
416 VPReductionSC,
417 VPReplicateSC,
418 VPScalarIVStepsSC,
419 VPVectorPointerSC,
420 VPVectorEndPointerSC,
421 VPWidenCallSC,
422 VPWidenCanonicalIVSC,
423 VPWidenCastSC,
424 VPWidenGEPSC,
425 VPWidenIntrinsicSC,
426 VPWidenLoadEVLSC,
427 VPWidenLoadSC,
428 VPWidenStoreEVLSC,
429 VPWidenStoreSC,
430 VPWidenSC,
431 VPBlendSC,
432 VPHistogramSC,
433 // START: Phi-like recipes. Need to be kept together.
434 VPWidenPHISC,
435 VPPredInstPHISC,
436 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
437 // VPHeaderPHIRecipe need to be kept together.
438 VPCanonicalIVPHISC,
439 VPCurrentIterationPHISC,
440 VPActiveLaneMaskPHISC,
441 VPFirstOrderRecurrencePHISC,
442 VPWidenIntOrFpInductionSC,
443 VPWidenPointerInductionSC,
444 VPReductionPHISC,
445 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
446 // END: Phi-like recipes
447 VPFirstPHISC = VPWidenPHISC,
448 VPFirstHeaderPHISC = VPCanonicalIVPHISC,
449 VPLastHeaderPHISC = VPReductionPHISC,
450 VPLastPHISC = VPReductionPHISC,
451 };
452
453 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
455 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
456
457 ~VPRecipeBase() override = default;
458
459 /// Clone the current recipe.
460 virtual VPRecipeBase *clone() = 0;
461
462 /// \return the VPBasicBlock which this VPRecipe belongs to.
463 VPBasicBlock *getParent() { return Parent; }
464 const VPBasicBlock *getParent() const { return Parent; }
465
466 /// \return the VPRegionBlock which the recipe belongs to.
467 VPRegionBlock *getRegion();
468 const VPRegionBlock *getRegion() const;
469
470 /// The method which generates the output IR instructions that correspond to
471 /// this VPRecipe, thereby "executing" the VPlan.
472 virtual void execute(VPTransformState &State) = 0;
473
474 /// Return the cost of this recipe, taking into account if the cost
475 /// computation should be skipped and the ForceTargetInstructionCost flag.
476 /// Also takes care of printing the cost for debugging.
478
479 /// Insert an unlinked recipe into a basic block immediately before
480 /// the specified recipe.
481 void insertBefore(VPRecipeBase *InsertPos);
482 /// Insert an unlinked recipe into \p BB immediately before the insertion
483 /// point \p IP;
484 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
485
486 /// Insert an unlinked Recipe into a basic block immediately after
487 /// the specified Recipe.
488 void insertAfter(VPRecipeBase *InsertPos);
489
490 /// Unlink this recipe from its current VPBasicBlock and insert it into
491 /// the VPBasicBlock that MovePos lives in, right after MovePos.
492 void moveAfter(VPRecipeBase *MovePos);
493
494 /// Unlink this recipe and insert into BB before I.
495 ///
496 /// \pre I is a valid iterator into BB.
497 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
498
499 /// This method unlinks 'this' from the containing basic block, but does not
500 /// delete it.
501 void removeFromParent();
502
503 /// This method unlinks 'this' from the containing basic block and deletes it.
504 ///
505 /// \returns an iterator pointing to the element after the erased one
507
508 /// \return an ID for the concrete type of this object.
509 unsigned getVPRecipeID() const { return SubclassID; }
510
511 /// Method to support type inquiry through isa, cast, and dyn_cast.
512 static inline bool classof(const VPDef *D) {
513 // All VPDefs are also VPRecipeBases.
514 return true;
515 }
516
517 static inline bool classof(const VPUser *U) { return true; }
518
519 /// Returns true if the recipe may have side-effects.
520 bool mayHaveSideEffects() const;
521
522 /// Returns true for PHI-like recipes.
523 bool isPhi() const;
524
525 /// Returns true if the recipe may read from memory.
526 bool mayReadFromMemory() const;
527
528 /// Returns true if the recipe may write to memory.
529 bool mayWriteToMemory() const;
530
531 /// Returns true if the recipe may read from or write to memory.
532 bool mayReadOrWriteMemory() const {
534 }
535
536 /// Returns the debug location of the recipe.
537 DebugLoc getDebugLoc() const { return DL; }
538
539 /// Return true if the recipe is a scalar cast.
540 bool isScalarCast() const;
541
542 /// Set the recipe's debug location to \p NewDL.
543 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
544
545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
546 /// Dump the recipe to stderr (for debugging).
547 LLVM_ABI_FOR_TEST void dump() const;
548
549 /// Print the recipe, delegating to printRecipe().
550 void print(raw_ostream &O, const Twine &Indent,
552#endif
553
554protected:
555 /// Compute the cost of this recipe either using a recipe's specialized
556 /// implementation or using the legacy cost model and the underlying
557 /// instructions.
558 virtual InstructionCost computeCost(ElementCount VF,
559 VPCostContext &Ctx) const;
560
561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
562 /// Each concrete VPRecipe prints itself, without printing common information,
563 /// like debug info or metadata.
564 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
565 VPSlotTracker &SlotTracker) const = 0;
566#endif
567};
568
569// Helper macro to define common classof implementations for recipes.
570#define VP_CLASSOF_IMPL(VPRecipeID) \
571 static inline bool classof(const VPRecipeBase *R) { \
572 return R->getVPRecipeID() == VPRecipeID; \
573 } \
574 static inline bool classof(const VPValue *V) { \
575 auto *R = V->getDefiningRecipe(); \
576 return R && R->getVPRecipeID() == VPRecipeID; \
577 } \
578 static inline bool classof(const VPUser *U) { \
579 auto *R = dyn_cast<VPRecipeBase>(U); \
580 return R && R->getVPRecipeID() == VPRecipeID; \
581 } \
582 static inline bool classof(const VPSingleDefRecipe *R) { \
583 return R->getVPRecipeID() == VPRecipeID; \
584 }
585
586/// VPSingleDef is a base class for recipes for modeling a sequence of one or
587/// more output IR that define a single result VPValue.
588/// Note that VPRecipeBase must be inherited from before VPValue.
590public:
591 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
593 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
594
595 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
597 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
598
599 static inline bool classof(const VPRecipeBase *R) {
600 switch (R->getVPRecipeID()) {
601 case VPRecipeBase::VPDerivedIVSC:
602 case VPRecipeBase::VPExpandSCEVSC:
603 case VPRecipeBase::VPExpressionSC:
604 case VPRecipeBase::VPInstructionSC:
605 case VPRecipeBase::VPReductionEVLSC:
606 case VPRecipeBase::VPReductionSC:
607 case VPRecipeBase::VPReplicateSC:
608 case VPRecipeBase::VPScalarIVStepsSC:
609 case VPRecipeBase::VPVectorPointerSC:
610 case VPRecipeBase::VPVectorEndPointerSC:
611 case VPRecipeBase::VPWidenCallSC:
612 case VPRecipeBase::VPWidenCanonicalIVSC:
613 case VPRecipeBase::VPWidenCastSC:
614 case VPRecipeBase::VPWidenGEPSC:
615 case VPRecipeBase::VPWidenIntrinsicSC:
616 case VPRecipeBase::VPWidenSC:
617 case VPRecipeBase::VPBlendSC:
618 case VPRecipeBase::VPPredInstPHISC:
619 case VPRecipeBase::VPCanonicalIVPHISC:
620 case VPRecipeBase::VPCurrentIterationPHISC:
621 case VPRecipeBase::VPActiveLaneMaskPHISC:
622 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
623 case VPRecipeBase::VPWidenPHISC:
624 case VPRecipeBase::VPWidenIntOrFpInductionSC:
625 case VPRecipeBase::VPWidenPointerInductionSC:
626 case VPRecipeBase::VPReductionPHISC:
627 return true;
628 case VPRecipeBase::VPBranchOnMaskSC:
629 case VPRecipeBase::VPInterleaveEVLSC:
630 case VPRecipeBase::VPInterleaveSC:
631 case VPRecipeBase::VPIRInstructionSC:
632 case VPRecipeBase::VPWidenLoadEVLSC:
633 case VPRecipeBase::VPWidenLoadSC:
634 case VPRecipeBase::VPWidenStoreEVLSC:
635 case VPRecipeBase::VPWidenStoreSC:
636 case VPRecipeBase::VPHistogramSC:
637 // TODO: Widened stores don't define a value, but widened loads do. Split
638 // the recipes to be able to make widened loads VPSingleDefRecipes.
639 return false;
640 }
641 llvm_unreachable("Unhandled VPRecipeID");
642 }
643
644 static inline bool classof(const VPValue *V) {
645 auto *R = V->getDefiningRecipe();
646 return R && classof(R);
647 }
648
649 static inline bool classof(const VPUser *U) {
650 auto *R = dyn_cast<VPRecipeBase>(U);
651 return R && classof(R);
652 }
653
654 VPSingleDefRecipe *clone() override = 0;
655
656 /// Returns the underlying instruction.
663
664#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
665 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
667#endif
668};
669
670/// Class to record and manage LLVM IR flags.
673 enum class OperationType : unsigned char {
674 Cmp,
675 FCmp,
676 OverflowingBinOp,
677 Trunc,
678 DisjointOp,
679 PossiblyExactOp,
680 GEPOp,
681 FPMathOp,
682 NonNegOp,
683 ReductionOp,
684 Other
685 };
686
687public:
688 struct WrapFlagsTy {
689 char HasNUW : 1;
690 char HasNSW : 1;
691
693 };
694
696 char HasNUW : 1;
697 char HasNSW : 1;
698
700 };
701
706
708 char NonNeg : 1;
709 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
710 };
711
712private:
713 struct ExactFlagsTy {
714 char IsExact : 1;
715 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
716 };
717 struct FastMathFlagsTy {
718 char AllowReassoc : 1;
719 char NoNaNs : 1;
720 char NoInfs : 1;
721 char NoSignedZeros : 1;
722 char AllowReciprocal : 1;
723 char AllowContract : 1;
724 char ApproxFunc : 1;
725
726 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
727 };
728 /// Holds both the predicate and fast-math flags for floating-point
729 /// comparisons.
730 struct FCmpFlagsTy {
731 uint8_t CmpPredStorage;
732 FastMathFlagsTy FMFs;
733 };
734 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
735 struct ReductionFlagsTy {
736 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
737 // additional kinds.
738 unsigned char Kind : 6;
739 // TODO: Derive order/in-loop from plan and remove here.
740 unsigned char IsOrdered : 1;
741 unsigned char IsInLoop : 1;
742 FastMathFlagsTy FMFs;
743
744 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
745 FastMathFlags FMFs)
746 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
747 IsInLoop(IsInLoop), FMFs(FMFs) {}
748 };
749
750 OperationType OpType;
751
752 union {
757 ExactFlagsTy ExactFlags;
760 FastMathFlagsTy FMFs;
761 FCmpFlagsTy FCmpFlags;
762 ReductionFlagsTy ReductionFlags;
764 };
765
766public:
767 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
768
770 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
771 OpType = OperationType::FCmp;
773 FCmp->getPredicate());
774 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
775 FCmpFlags.FMFs = FCmp->getFastMathFlags();
776 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
777 OpType = OperationType::Cmp;
779 Op->getPredicate());
780 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
781 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
782 OpType = OperationType::DisjointOp;
783 DisjointFlags.IsDisjoint = Op->isDisjoint();
784 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
785 OpType = OperationType::OverflowingBinOp;
786 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
787 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
788 OpType = OperationType::Trunc;
789 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
790 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
791 OpType = OperationType::PossiblyExactOp;
792 ExactFlags.IsExact = Op->isExact();
793 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
794 OpType = OperationType::GEPOp;
795 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
796 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
797 "wrap flags truncated");
798 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
799 OpType = OperationType::NonNegOp;
800 NonNegFlags.NonNeg = PNNI->hasNonNeg();
801 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
802 OpType = OperationType::FPMathOp;
803 FMFs = Op->getFastMathFlags();
804 }
805 }
806
807 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
809 assert(getPredicate() == Pred && "predicate truncated");
810 }
811
813 : OpType(OperationType::FCmp), AllFlags() {
815 assert(getPredicate() == Pred && "predicate truncated");
816 FCmpFlags.FMFs = FMFs;
817 }
818
820 : OpType(OperationType::OverflowingBinOp), AllFlags() {
821 this->WrapFlags = WrapFlags;
822 }
823
825 : OpType(OperationType::Trunc), AllFlags() {
826 this->TruncFlags = TruncFlags;
827 }
828
829 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
830 this->FMFs = FMFs;
831 }
832
834 : OpType(OperationType::DisjointOp), AllFlags() {
835 this->DisjointFlags = DisjointFlags;
836 }
837
839 : OpType(OperationType::NonNegOp), AllFlags() {
840 this->NonNegFlags = NonNegFlags;
841 }
842
843 VPIRFlags(ExactFlagsTy ExactFlags)
844 : OpType(OperationType::PossiblyExactOp), AllFlags() {
845 this->ExactFlags = ExactFlags;
846 }
847
849 : OpType(OperationType::GEPOp), AllFlags() {
850 GEPFlagsStorage = GEPFlags.getRaw();
851 }
852
853 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
854 : OpType(OperationType::ReductionOp), AllFlags() {
855 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
856 }
857
859 OpType = Other.OpType;
860 AllFlags[0] = Other.AllFlags[0];
861 AllFlags[1] = Other.AllFlags[1];
862 }
863
864 /// Only keep flags also present in \p Other. \p Other must have the same
865 /// OpType as the current object.
866 void intersectFlags(const VPIRFlags &Other);
867
868 /// Drop all poison-generating flags.
870 // NOTE: This needs to be kept in-sync with
871 // Instruction::dropPoisonGeneratingFlags.
872 switch (OpType) {
873 case OperationType::OverflowingBinOp:
874 WrapFlags.HasNUW = false;
875 WrapFlags.HasNSW = false;
876 break;
877 case OperationType::Trunc:
878 TruncFlags.HasNUW = false;
879 TruncFlags.HasNSW = false;
880 break;
881 case OperationType::DisjointOp:
882 DisjointFlags.IsDisjoint = false;
883 break;
884 case OperationType::PossiblyExactOp:
885 ExactFlags.IsExact = false;
886 break;
887 case OperationType::GEPOp:
888 GEPFlagsStorage = 0;
889 break;
890 case OperationType::FPMathOp:
891 case OperationType::FCmp:
892 case OperationType::ReductionOp:
893 getFMFsRef().NoNaNs = false;
894 getFMFsRef().NoInfs = false;
895 break;
896 case OperationType::NonNegOp:
897 NonNegFlags.NonNeg = false;
898 break;
899 case OperationType::Cmp:
900 case OperationType::Other:
901 break;
902 }
903 }
904
905 /// Apply the IR flags to \p I.
906 void applyFlags(Instruction &I) const {
907 switch (OpType) {
908 case OperationType::OverflowingBinOp:
909 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
910 I.setHasNoSignedWrap(WrapFlags.HasNSW);
911 break;
912 case OperationType::Trunc:
913 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
914 I.setHasNoSignedWrap(TruncFlags.HasNSW);
915 break;
916 case OperationType::DisjointOp:
917 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
918 break;
919 case OperationType::PossiblyExactOp:
920 I.setIsExact(ExactFlags.IsExact);
921 break;
922 case OperationType::GEPOp:
923 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
925 break;
926 case OperationType::FPMathOp:
927 case OperationType::FCmp: {
928 const FastMathFlagsTy &F = getFMFsRef();
929 I.setHasAllowReassoc(F.AllowReassoc);
930 I.setHasNoNaNs(F.NoNaNs);
931 I.setHasNoInfs(F.NoInfs);
932 I.setHasNoSignedZeros(F.NoSignedZeros);
933 I.setHasAllowReciprocal(F.AllowReciprocal);
934 I.setHasAllowContract(F.AllowContract);
935 I.setHasApproxFunc(F.ApproxFunc);
936 break;
937 }
938 case OperationType::NonNegOp:
939 I.setNonNeg(NonNegFlags.NonNeg);
940 break;
941 case OperationType::ReductionOp:
942 llvm_unreachable("reduction ops should not use applyFlags");
943 case OperationType::Cmp:
944 case OperationType::Other:
945 break;
946 }
947 }
948
950 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
951 "recipe doesn't have a compare predicate");
952 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
955 }
956
958 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
959 "recipe doesn't have a compare predicate");
960 if (OpType == OperationType::FCmp)
962 else
964 assert(getPredicate() == Pred && "predicate truncated");
965 }
966
970
971 /// Returns true if the recipe has a comparison predicate.
972 bool hasPredicate() const {
973 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
974 }
975
976 /// Returns true if the recipe has fast-math flags.
977 bool hasFastMathFlags() const {
978 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
979 OpType == OperationType::ReductionOp;
980 }
981
983
984 /// Returns true if the recipe has non-negative flag.
985 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
986
987 bool isNonNeg() const {
988 assert(OpType == OperationType::NonNegOp &&
989 "recipe doesn't have a NNEG flag");
990 return NonNegFlags.NonNeg;
991 }
992
993 bool hasNoUnsignedWrap() const {
994 switch (OpType) {
995 case OperationType::OverflowingBinOp:
996 return WrapFlags.HasNUW;
997 case OperationType::Trunc:
998 return TruncFlags.HasNUW;
999 default:
1000 llvm_unreachable("recipe doesn't have a NUW flag");
1001 }
1002 }
1003
1004 bool hasNoSignedWrap() const {
1005 switch (OpType) {
1006 case OperationType::OverflowingBinOp:
1007 return WrapFlags.HasNSW;
1008 case OperationType::Trunc:
1009 return TruncFlags.HasNSW;
1010 default:
1011 llvm_unreachable("recipe doesn't have a NSW flag");
1012 }
1013 }
1014
1015 bool isDisjoint() const {
1016 assert(OpType == OperationType::DisjointOp &&
1017 "recipe cannot have a disjoing flag");
1018 return DisjointFlags.IsDisjoint;
1019 }
1020
1022 assert(OpType == OperationType::ReductionOp &&
1023 "recipe doesn't have reduction flags");
1024 return static_cast<RecurKind>(ReductionFlags.Kind);
1025 }
1026
1027 bool isReductionOrdered() const {
1028 assert(OpType == OperationType::ReductionOp &&
1029 "recipe doesn't have reduction flags");
1030 return ReductionFlags.IsOrdered;
1031 }
1032
1033 bool isReductionInLoop() const {
1034 assert(OpType == OperationType::ReductionOp &&
1035 "recipe doesn't have reduction flags");
1036 return ReductionFlags.IsInLoop;
1037 }
1038
1039private:
1040 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1041 FastMathFlagsTy &getFMFsRef() {
1042 if (OpType == OperationType::FCmp)
1043 return FCmpFlags.FMFs;
1044 if (OpType == OperationType::ReductionOp)
1045 return ReductionFlags.FMFs;
1046 return FMFs;
1047 }
1048 const FastMathFlagsTy &getFMFsRef() const {
1049 if (OpType == OperationType::FCmp)
1050 return FCmpFlags.FMFs;
1051 if (OpType == OperationType::ReductionOp)
1052 return ReductionFlags.FMFs;
1053 return FMFs;
1054 }
1055
1056public:
1057 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1058 /// otherwise. Opcodes not supporting default flags include compares and
1059 /// ComputeReductionResult.
1060 static VPIRFlags getDefaultFlags(unsigned Opcode);
1061
1062#if !defined(NDEBUG)
1063 /// Returns true if the set flags are valid for \p Opcode.
1064 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1065
1066 /// Returns true if \p Opcode has its required flags set.
1067 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1068#endif
1069
1070#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1071 void printFlags(raw_ostream &O) const;
1072#endif
1073};
1075
1076static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1077
1078/// A pure-virtual common base class for recipes defining a single VPValue and
1079/// using IR flags.
1081 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1082 const VPIRFlags &Flags,
1084 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1085
1086 static inline bool classof(const VPRecipeBase *R) {
1087 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1088 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1089 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1090 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1091 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1092 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1093 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1094 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1095 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1096 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1097 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1098 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1099 }
1100
1101 static inline bool classof(const VPUser *U) {
1102 auto *R = dyn_cast<VPRecipeBase>(U);
1103 return R && classof(R);
1104 }
1105
1106 static inline bool classof(const VPValue *V) {
1107 auto *R = V->getDefiningRecipe();
1108 return R && classof(R);
1109 }
1110
1112
1113 static inline bool classof(const VPSingleDefRecipe *R) {
1114 return classof(static_cast<const VPRecipeBase *>(R));
1115 }
1116
1117 void execute(VPTransformState &State) override = 0;
1118
1119 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1121 VPCostContext &Ctx) const;
1122};
1123
1124/// Helper to access the operand that contains the unroll part for this recipe
1125/// after unrolling.
1126template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1127protected:
1128 /// Return the VPValue operand containing the unroll part or null if there is
1129 /// no such operand.
1130 VPValue *getUnrollPartOperand(const VPUser &U) const;
1131
1132 /// Return the unroll part.
1133 unsigned getUnrollPart(const VPUser &U) const;
1134};
1135
1136/// Helper to manage IR metadata for recipes. It filters out metadata that
1137/// cannot be propagated.
1140
1141public:
1142 VPIRMetadata() = default;
1143
1144 /// Adds metatadata that can be preserved from the original instruction
1145 /// \p I.
1147
1148 /// Copy constructor for cloning.
1150
1152
1153 /// Add all metadata to \p I.
1154 void applyMetadata(Instruction &I) const;
1155
1156 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1157 /// already exists, it will be replaced. Otherwise, it will be added.
1158 void setMetadata(unsigned Kind, MDNode *Node) {
1159 auto It =
1160 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1161 return P.first == Kind;
1162 });
1163 if (It != Metadata.end())
1164 It->second = Node;
1165 else
1166 Metadata.emplace_back(Kind, Node);
1167 }
1168
1169 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1170 /// nodes that are common to both.
1171 void intersect(const VPIRMetadata &MD);
1172
1173 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1174 MDNode *getMetadata(unsigned Kind) const {
1175 auto It =
1176 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1177 return It != Metadata.end() ? It->second : nullptr;
1178 }
1179
1180#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1181 /// Print metadata with node IDs.
1182 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1183#endif
1184};
1185
1186/// This is a concrete Recipe that models a single VPlan-level instruction.
1187/// While as any Recipe it may generate a sequence of IR instructions when
1188/// executed, these instructions would always form a single-def expression as
1189/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1190/// opcodes can take an optional mask. Masks may be assigned during
1191/// predication.
1193 public VPIRMetadata {
1194 friend class VPlanSlp;
1195
1196public:
1197 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1198 enum {
1200 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1201 // values of a first-order recurrence.
1205 // Creates a mask where each lane is active (true) whilst the current
1206 // counter (first operand + index) is less than the second operand. i.e.
1207 // mask[i] = icmpt ult (op0 + i), op1
1208 // The size of the mask returned is VF * Multiplier (UF, third op).
1212 // Increment the canonical IV separately for each unrolled part.
1214 // Abstract instruction that compares two values and branches. This is
1215 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1218 // Branch with 2 boolean condition operands and 3 successors. If condition
1219 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1220 // successor 1; otherwise branches to successor 2. Expanded after region
1221 // dissolution into: (1) an OR of the two conditions branching to
1222 // middle.split or successor 2, and (2) middle.split branching to successor
1223 // 0 or successor 1 based on condition 0.
1226 /// Given operands of (the same) struct type, creates a struct of fixed-
1227 /// width vectors each containing a struct field of all operands. The
1228 /// number of operands matches the element count of every vector.
1230 /// Creates a fixed-width vector containing all operands. The number of
1231 /// operands matches the vector element count.
1233 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1234 /// abstract VPInstruction whose single defined VPValue represents VF
1235 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1236 /// VPInstructions.
1238 /// Compute the final result of a AnyOf reduction with select(cmp(),x,y),
1239 /// where one of (x,y) is loop invariant, and both x and y are integer type.
1242 // Extracts the last part of its operand. Removed during unrolling.
1244 // Extracts the last lane of its vector operand, per part.
1246 // Extracts the second-to-last lane from its operand or the second-to-last
1247 // part if it is scalar. In the latter case, the recipe will be removed
1248 // during unrolling.
1250 LogicalAnd, // Non-poison propagating logical And.
1251 LogicalOr, // Non-poison propagating logical Or.
1252 // Add an offset in bytes (second operand) to a base pointer (first
1253 // operand). Only generates scalar values (either for the first lane only or
1254 // for all lanes, depending on its uses).
1256 // Add a vector offset in bytes (second operand) to a scalar base pointer
1257 // (first operand).
1259 // Returns a scalar boolean value, which is true if any lane of its
1260 // (boolean) vector operands is true. It produces the reduced value across
1261 // all unrolled iterations. Unrolling will add all copies of its original
1262 // operand as additional operands. AnyOf is poison-safe as all operands
1263 // will be frozen.
1265 // Calculates the first active lane index of the vector predicate operands.
1266 // It produces the lane index across all unrolled iterations. Unrolling will
1267 // add all copies of its original operand as additional operands.
1268 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1269 // result even with operands that are all zeroes.
1271 // Calculates the last active lane index of the vector predicate operands.
1272 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1273 // tail-folding to extract the correct live-out value from the last active
1274 // iteration. It produces the lane index across all unrolled iterations.
1275 // Unrolling will add all copies of its original operand as additional
1276 // operands.
1278 // Returns a reversed vector for the operand.
1280
1281 // The opcodes below are used for VPInstructionWithType.
1282 //
1283 /// Scale the first operand (vector step) by the second operand
1284 /// (scalar-step). Casts both operands to the result type if needed.
1286 /// Start vector for reductions with 3 operands: the original start value,
1287 /// the identity value for the reduction and an integer indicating the
1288 /// scaling factor.
1290 // Creates a step vector starting from 0 to VF with a step of 1.
1292 /// Extracts a single lane (first operand) from a set of vector operands.
1293 /// The lane specifies an index into a vector formed by combining all vector
1294 /// operands (all operands after the first one).
1296 /// Explicit user for the resume phi of the canonical induction in the main
1297 /// VPlan, used by the epilogue vector loop.
1299 /// Extracts the last active lane from a set of vectors. The first operand
1300 /// is the default value if no lanes in the masks are active. Conceptually,
1301 /// this concatenates all data vectors (odd operands), concatenates all
1302 /// masks (even operands -- ignoring the default value), and returns the
1303 /// last active value from the combined data vector using the combined mask.
1305
1306 /// Returns the value for vscale.
1308 /// Compute the exiting value of a wide induction after vectorization, that
1309 /// is the value of the last lane of the induction increment (i.e. its
1310 /// backedge value). Has the wide induction recipe as operand.
1314 };
1315
1316 /// Returns true if this VPInstruction generates scalar values for all lanes.
1317 /// Most VPInstructions generate a single value per part, either vector or
1318 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1319 /// values per all lanes, stemming from an original ingredient. This method
1320 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1321 /// underlying ingredient.
1322 bool doesGeneratePerAllLanes() const;
1323
1324 /// Return the number of operands determined by the opcode of the
1325 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1326 /// cannot be determined directly by the opcode.
1327 unsigned getNumOperandsForOpcode() const;
1328
1329private:
1330 typedef unsigned char OpcodeTy;
1331 OpcodeTy Opcode;
1332
1333 /// An optional name that can be used for the generated IR instruction.
1334 std::string Name;
1335
1336 /// Returns true if we can generate a scalar for the first lane only if
1337 /// needed.
1338 bool canGenerateScalarForFirstLane() const;
1339
1340 /// Utility methods serving execute(): generates a single vector instance of
1341 /// the modeled instruction. \returns the generated value. . In some cases an
1342 /// existing value is returned rather than a generated one.
1343 Value *generate(VPTransformState &State);
1344
1345 /// Returns true if the VPInstruction does not need masking.
1346 bool alwaysUnmasked() const {
1347 if (Opcode == VPInstruction::MaskedCond)
1348 return false;
1349
1350 // For now only VPInstructions with underlying values use masks.
1351 // TODO: provide masks to VPInstructions w/o underlying values.
1352 if (!getUnderlyingValue())
1353 return true;
1354
1355 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1356 }
1357
1358public:
1359 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1360 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1361 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1362
1363 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1364
1365 VPInstruction *clone() override {
1366 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1367 getDebugLoc(), Name);
1368 if (getUnderlyingValue())
1369 New->setUnderlyingValue(getUnderlyingInstr());
1370 return New;
1371 }
1372
1373 unsigned getOpcode() const { return Opcode; }
1374
1375 /// Generate the instruction.
1376 /// TODO: We currently execute only per-part unless a specific instance is
1377 /// provided.
1378 void execute(VPTransformState &State) override;
1379
1380 /// Return the cost of this VPInstruction.
1381 InstructionCost computeCost(ElementCount VF,
1382 VPCostContext &Ctx) const override;
1383
1384#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1385 /// Print the VPInstruction to dbgs() (for debugging).
1386 LLVM_DUMP_METHOD void dump() const;
1387#endif
1388
1389 bool hasResult() const {
1390 // CallInst may or may not have a result, depending on the called function.
1391 // Conservatively return calls have results for now.
1392 switch (getOpcode()) {
1393 case Instruction::Ret:
1394 case Instruction::UncondBr:
1395 case Instruction::CondBr:
1396 case Instruction::Store:
1397 case Instruction::Switch:
1398 case Instruction::IndirectBr:
1399 case Instruction::Resume:
1400 case Instruction::CatchRet:
1401 case Instruction::Unreachable:
1402 case Instruction::Fence:
1403 case Instruction::AtomicRMW:
1407 return false;
1408 default:
1409 return true;
1410 }
1411 }
1412
1413 /// Returns true if the VPInstruction has a mask operand.
1414 bool isMasked() const {
1415 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1416 // VPInstructions without a fixed number of operands cannot be masked.
1417 if (NumOpsForOpcode == -1u)
1418 return false;
1419 return NumOpsForOpcode + 1 == getNumOperands();
1420 }
1421
1422 /// Returns the number of operands, excluding the mask if the VPInstruction is
1423 /// masked.
1424 unsigned getNumOperandsWithoutMask() const {
1425 return getNumOperands() - isMasked();
1426 }
1427
1428 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1429 void addMask(VPValue *Mask) {
1430 assert(!isMasked() && "recipe is already masked");
1431 if (alwaysUnmasked())
1432 return;
1433 addOperand(Mask);
1434 }
1435
1436 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1437 /// VPInstructions.
1438 VPValue *getMask() const {
1439 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1440 }
1441
1442 /// Returns an iterator range over the operands excluding the mask operand
1443 /// if present.
1450
1451 /// Returns true if the underlying opcode may read from or write to memory.
1452 bool opcodeMayReadOrWriteFromMemory() const;
1453
1454 /// Returns true if the recipe only uses the first lane of operand \p Op.
1455 bool usesFirstLaneOnly(const VPValue *Op) const override;
1456
1457 /// Returns true if the recipe only uses the first part of operand \p Op.
1458 bool usesFirstPartOnly(const VPValue *Op) const override;
1459
1460 /// Returns true if this VPInstruction produces a scalar value from a vector,
1461 /// e.g. by performing a reduction or extracting a lane.
1462 bool isVectorToScalar() const;
1463
1464 /// Returns true if this VPInstruction's operands are single scalars and the
1465 /// result is also a single scalar.
1466 bool isSingleScalar() const;
1467
1468 /// Returns the symbolic name assigned to the VPInstruction.
1469 StringRef getName() const { return Name; }
1470
1471 /// Set the symbolic name for the VPInstruction.
1472 void setName(StringRef NewName) { Name = NewName.str(); }
1473
1474protected:
1475#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1476 /// Print the VPInstruction to \p O.
1477 void printRecipe(raw_ostream &O, const Twine &Indent,
1478 VPSlotTracker &SlotTracker) const override;
1479#endif
1480};
1481
1482/// A specialization of VPInstruction augmenting it with a dedicated result
1483/// type, to be used when the opcode and operands of the VPInstruction don't
1484/// directly determine the result type. Note that there is no separate recipe ID
1485/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1486/// distinguished purely by the opcode.
1488 /// Scalar result type produced by the recipe.
1489 Type *ResultTy;
1490
1491public:
1493 Type *ResultTy, const VPIRFlags &Flags = {},
1494 const VPIRMetadata &Metadata = {},
1496 const Twine &Name = "")
1497 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1498 ResultTy(ResultTy) {}
1499
1500 static inline bool classof(const VPRecipeBase *R) {
1501 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1502 // type information.
1503 if (R->isScalarCast())
1504 return true;
1505 auto *VPI = dyn_cast<VPInstruction>(R);
1506 if (!VPI)
1507 return false;
1508 switch (VPI->getOpcode()) {
1512 case Instruction::Load:
1513 return true;
1514 default:
1515 return false;
1516 }
1517 }
1518
1519 static inline bool classof(const VPUser *R) {
1521 }
1522
1523 VPInstruction *clone() override {
1524 auto *New =
1526 *this, *this, getDebugLoc(), getName());
1527 New->setUnderlyingValue(getUnderlyingValue());
1528 return New;
1529 }
1530
1531 void execute(VPTransformState &State) override;
1532
1533 /// Return the cost of this VPInstruction.
1535 VPCostContext &Ctx) const override {
1536 // TODO: Compute accurate cost after retiring the legacy cost model.
1537 return 0;
1538 }
1539
1540 Type *getResultType() const { return ResultTy; }
1541
1542protected:
1543#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1544 /// Print the recipe.
1545 void printRecipe(raw_ostream &O, const Twine &Indent,
1546 VPSlotTracker &SlotTracker) const override;
1547#endif
1548};
1549
1550/// Helper type to provide functions to access incoming values and blocks for
1551/// phi-like recipes.
1553protected:
1554 /// Return a VPRecipeBase* to the current object.
1555 virtual const VPRecipeBase *getAsRecipe() const = 0;
1556
1557public:
1558 virtual ~VPPhiAccessors() = default;
1559
1560 /// Returns the incoming VPValue with index \p Idx.
1561 VPValue *getIncomingValue(unsigned Idx) const {
1562 return getAsRecipe()->getOperand(Idx);
1563 }
1564
1565 /// Returns the incoming block with index \p Idx.
1566 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1567
1568 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1569 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1570
1571 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1572 /// block.
1573 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1574
1575 /// Returns the number of incoming values, also number of incoming blocks.
1576 virtual unsigned getNumIncoming() const {
1577 return getAsRecipe()->getNumOperands();
1578 }
1579
1580 /// Returns an interator range over the incoming values.
1582 return make_range(getAsRecipe()->op_begin(),
1583 getAsRecipe()->op_begin() + getNumIncoming());
1584 }
1585
1587 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1588
1589 /// Returns an iterator range over the incoming blocks.
1591 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1592 return getIncomingBlock(Idx);
1593 };
1594 return map_range(index_range(0, getNumIncoming()), GetBlock);
1595 }
1596
1597 /// Returns an iterator range over pairs of incoming values and corresponding
1598 /// incoming blocks.
1604
1605 /// Removes the incoming value for \p IncomingBlock, which must be a
1606 /// predecessor.
1607 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1608
1609#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1610 /// Print the recipe.
1612#endif
1613};
1614
1617 const Twine &Name = "")
1618 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1619
1620 static inline bool classof(const VPUser *U) {
1621 auto *VPI = dyn_cast<VPInstruction>(U);
1622 return VPI && VPI->getOpcode() == Instruction::PHI;
1623 }
1624
1625 static inline bool classof(const VPValue *V) {
1626 auto *VPI = dyn_cast<VPInstruction>(V);
1627 return VPI && VPI->getOpcode() == Instruction::PHI;
1628 }
1629
1630 static inline bool classof(const VPSingleDefRecipe *SDR) {
1631 auto *VPI = dyn_cast<VPInstruction>(SDR);
1632 return VPI && VPI->getOpcode() == Instruction::PHI;
1633 }
1634
1635 VPPhi *clone() override {
1636 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1637 PhiR->setUnderlyingValue(getUnderlyingValue());
1638 return PhiR;
1639 }
1640
1641 void execute(VPTransformState &State) override;
1642
1643protected:
1644#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1645 /// Print the recipe.
1646 void printRecipe(raw_ostream &O, const Twine &Indent,
1647 VPSlotTracker &SlotTracker) const override;
1648#endif
1649
1650 const VPRecipeBase *getAsRecipe() const override { return this; }
1651};
1652
1653/// A recipe to wrap on original IR instruction not to be modified during
1654/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1655/// Expect PHIs, VPIRInstructions cannot have any operands.
1657 Instruction &I;
1658
1659protected:
1660 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1661 /// subclasses may need to be created, e.g. VPIRPhi.
1663 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1664
1665public:
1666 ~VPIRInstruction() override = default;
1667
1668 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1669 /// VPIRInstruction.
1671
1672 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1673
1675 auto *R = create(I);
1676 for (auto *Op : operands())
1677 R->addOperand(Op);
1678 return R;
1679 }
1680
1681 void execute(VPTransformState &State) override;
1682
1683 /// Return the cost of this VPIRInstruction.
1685 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1686
1687 Instruction &getInstruction() const { return I; }
1688
1689 bool usesScalars(const VPValue *Op) const override {
1691 "Op must be an operand of the recipe");
1692 return true;
1693 }
1694
1695 bool usesFirstPartOnly(const VPValue *Op) const override {
1697 "Op must be an operand of the recipe");
1698 return true;
1699 }
1700
1701 bool usesFirstLaneOnly(const VPValue *Op) const override {
1703 "Op must be an operand of the recipe");
1704 return true;
1705 }
1706
1707protected:
1708#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1709 /// Print the recipe.
1710 void printRecipe(raw_ostream &O, const Twine &Indent,
1711 VPSlotTracker &SlotTracker) const override;
1712#endif
1713};
1714
1715/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1716/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1717/// allowed, and it is used to add a new incoming value for the single
1718/// predecessor VPBB.
1720 public VPPhiAccessors {
1722
1723 static inline bool classof(const VPRecipeBase *U) {
1724 auto *R = dyn_cast<VPIRInstruction>(U);
1725 return R && isa<PHINode>(R->getInstruction());
1726 }
1727
1728 static inline bool classof(const VPUser *U) {
1729 auto *R = dyn_cast<VPRecipeBase>(U);
1730 return R && classof(R);
1731 }
1732
1734
1735 void execute(VPTransformState &State) override;
1736
1737protected:
1738#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1739 /// Print the recipe.
1740 void printRecipe(raw_ostream &O, const Twine &Indent,
1741 VPSlotTracker &SlotTracker) const override;
1742#endif
1743
1744 const VPRecipeBase *getAsRecipe() const override { return this; }
1745};
1746
1747/// VPWidenRecipe is a recipe for producing a widened instruction using the
1748/// opcode and operands of the recipe. This recipe covers most of the
1749/// traditional vectorization cases where each recipe transforms into a
1750/// vectorized version of itself.
1752 public VPIRMetadata {
1753 unsigned Opcode;
1754
1755public:
1757 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1758 DebugLoc DL = {})
1759 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1760 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1761 setUnderlyingValue(&I);
1762 }
1763
1764 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1765 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1766 DebugLoc DL = {})
1767 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1768 VPIRMetadata(Metadata), Opcode(Opcode) {}
1769
1770 ~VPWidenRecipe() override = default;
1771
1772 VPWidenRecipe *clone() override {
1773 if (auto *UV = getUnderlyingValue())
1774 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1775 getDebugLoc());
1776 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1777 }
1778
1779 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1780
1781 /// Produce a widened instruction using the opcode and operands of the recipe,
1782 /// processing State.VF elements.
1783 void execute(VPTransformState &State) override;
1784
1785 /// Return the cost of this VPWidenRecipe.
1786 InstructionCost computeCost(ElementCount VF,
1787 VPCostContext &Ctx) const override;
1788
1789 unsigned getOpcode() const { return Opcode; }
1790
1791protected:
1792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1793 /// Print the recipe.
1794 void printRecipe(raw_ostream &O, const Twine &Indent,
1795 VPSlotTracker &SlotTracker) const override;
1796#endif
1797
1798 /// Returns true if the recipe only uses the first lane of operand \p Op.
1799 bool usesFirstLaneOnly(const VPValue *Op) const override {
1801 "Op must be an operand of the recipe");
1802 return Opcode == Instruction::Select && Op == getOperand(0) &&
1803 Op->isDefinedOutsideLoopRegions();
1804 }
1805};
1806
1807/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1809 /// Cast instruction opcode.
1810 Instruction::CastOps Opcode;
1811
1812 /// Result type for the cast.
1813 Type *ResultTy;
1814
1815public:
1817 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1818 const VPIRMetadata &Metadata = {},
1820 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1821 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1822 assert(flagsValidForOpcode(Opcode) &&
1823 "Set flags not supported for the provided opcode");
1825 "Opcode requires specific flags to be set");
1827 }
1828
1829 ~VPWidenCastRecipe() override = default;
1830
1832 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1834 *this, *this, getDebugLoc());
1835 }
1836
1837 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1838
1839 /// Produce widened copies of the cast.
1840 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1841
1842 /// Return the cost of this VPWidenCastRecipe.
1844 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1845
1846 Instruction::CastOps getOpcode() const { return Opcode; }
1847
1848 /// Returns the result type of the cast.
1849 Type *getResultType() const { return ResultTy; }
1850
1851protected:
1852#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1853 /// Print the recipe.
1854 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1855 VPSlotTracker &SlotTracker) const override;
1856#endif
1857};
1858
1859/// A recipe for widening vector intrinsics.
1861 /// ID of the vector intrinsic to widen.
1862 Intrinsic::ID VectorIntrinsicID;
1863
1864 /// Scalar return type of the intrinsic.
1865 Type *ResultTy;
1866
1867 /// True if the intrinsic may read from memory.
1868 bool MayReadFromMemory;
1869
1870 /// True if the intrinsic may read write to memory.
1871 bool MayWriteToMemory;
1872
1873 /// True if the intrinsic may have side-effects.
1874 bool MayHaveSideEffects;
1875
1876public:
1878 ArrayRef<VPValue *> CallArguments, Type *Ty,
1879 const VPIRFlags &Flags = {},
1880 const VPIRMetadata &MD = {},
1882 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1883 Flags, DL),
1884 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1885 MayReadFromMemory(CI.mayReadFromMemory()),
1886 MayWriteToMemory(CI.mayWriteToMemory()),
1887 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1888 setUnderlyingValue(&CI);
1889 }
1890
1892 ArrayRef<VPValue *> CallArguments, Type *Ty,
1893 const VPIRFlags &Flags = {},
1894 const VPIRMetadata &Metadata = {},
1896 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1897 Flags, DL),
1898 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1899 ResultTy(Ty) {
1900 LLVMContext &Ctx = Ty->getContext();
1901 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1902 MemoryEffects ME = Attrs.getMemoryEffects();
1903 MayReadFromMemory = !ME.onlyWritesMemory();
1904 MayWriteToMemory = !ME.onlyReadsMemory();
1905 MayHaveSideEffects = MayWriteToMemory ||
1906 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1907 !Attrs.hasAttribute(Attribute::WillReturn);
1908 }
1909
1910 ~VPWidenIntrinsicRecipe() override = default;
1911
1913 if (Value *CI = getUnderlyingValue())
1914 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1915 operands(), ResultTy, *this, *this,
1916 getDebugLoc());
1917 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1918 *this, *this, getDebugLoc());
1919 }
1920
1921 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1922
1923 /// Produce a widened version of the vector intrinsic.
1924 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1925
1926 /// Return the cost of this vector intrinsic.
1928 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1929
1930 /// Return the ID of the intrinsic.
1931 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1932
1933 /// Return the scalar return type of the intrinsic.
1934 Type *getResultType() const { return ResultTy; }
1935
1936 /// Return to name of the intrinsic as string.
1938
1939 /// Returns true if the intrinsic may read from memory.
1940 bool mayReadFromMemory() const { return MayReadFromMemory; }
1941
1942 /// Returns true if the intrinsic may write to memory.
1943 bool mayWriteToMemory() const { return MayWriteToMemory; }
1944
1945 /// Returns true if the intrinsic may have side-effects.
1946 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1947
1948 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1949
1950protected:
1951#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1952 /// Print the recipe.
1953 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1954 VPSlotTracker &SlotTracker) const override;
1955#endif
1956};
1957
1958/// A recipe for widening Call instructions using library calls.
1960 public VPIRMetadata {
1961 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1962 /// between a given VF and the chosen vectorized variant, so there will be a
1963 /// different VPlan for each VF with a valid variant.
1964 Function *Variant;
1965
1966public:
1968 ArrayRef<VPValue *> CallArguments,
1969 const VPIRFlags &Flags = {},
1970 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1971 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
1972 DL),
1973 VPIRMetadata(Metadata), Variant(Variant) {
1974 setUnderlyingValue(UV);
1975 assert(
1976 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1977 "last operand must be the called function");
1978 }
1979
1980 ~VPWidenCallRecipe() override = default;
1981
1983 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
1984 *this, *this, getDebugLoc());
1985 }
1986
1987 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
1988
1989 /// Produce a widened version of the call instruction.
1990 void execute(VPTransformState &State) override;
1991
1992 /// Return the cost of this VPWidenCallRecipe.
1993 InstructionCost computeCost(ElementCount VF,
1994 VPCostContext &Ctx) const override;
1995
1999
2002
2003protected:
2004#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2005 /// Print the recipe.
2006 void printRecipe(raw_ostream &O, const Twine &Indent,
2007 VPSlotTracker &SlotTracker) const override;
2008#endif
2009};
2010
2011/// A recipe representing a sequence of load -> update -> store as part of
2012/// a histogram operation. This means there may be aliasing between vector
2013/// lanes, which is handled by the llvm.experimental.vector.histogram family
2014/// of intrinsics. The only update operations currently supported are
2015/// 'add' and 'sub' where the other term is loop-invariant.
2017 /// Opcode of the update operation, currently either add or sub.
2018 unsigned Opcode;
2019
2020public:
2021 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2023 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2024 Opcode(Opcode) {}
2025
2026 ~VPHistogramRecipe() override = default;
2027
2029 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2030 }
2031
2032 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2033
2034 /// Produce a vectorized histogram operation.
2035 void execute(VPTransformState &State) override;
2036
2037 /// Return the cost of this VPHistogramRecipe.
2039 VPCostContext &Ctx) const override;
2040
2041 unsigned getOpcode() const { return Opcode; }
2042
2043 /// Return the mask operand if one was provided, or a null pointer if all
2044 /// lanes should be executed unconditionally.
2045 VPValue *getMask() const {
2046 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2047 }
2048
2049protected:
2050#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2051 /// Print the recipe
2052 void printRecipe(raw_ostream &O, const Twine &Indent,
2053 VPSlotTracker &SlotTracker) const override;
2054#endif
2055};
2056
2057/// A recipe for handling GEP instructions.
2059 Type *SourceElementTy;
2060
2061 bool isPointerLoopInvariant() const {
2062 return getOperand(0)->isDefinedOutsideLoopRegions();
2063 }
2064
2065 bool isIndexLoopInvariant(unsigned I) const {
2066 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2067 }
2068
2069public:
2071 const VPIRFlags &Flags = {},
2073 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2074 SourceElementTy(GEP->getSourceElementType()) {
2075 setUnderlyingValue(GEP);
2077 (void)Metadata;
2079 assert(Metadata.empty() && "unexpected metadata on GEP");
2080 }
2081
2082 ~VPWidenGEPRecipe() override = default;
2083
2086 operands(), *this, getDebugLoc());
2087 }
2088
2089 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2090
2091 /// This recipe generates a GEP instruction.
2092 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2093
2094 /// Generate the gep nodes.
2095 void execute(VPTransformState &State) override;
2096
2097 Type *getSourceElementType() const { return SourceElementTy; }
2098
2099 /// Return the cost of this VPWidenGEPRecipe.
2101 VPCostContext &Ctx) const override {
2102 // TODO: Compute accurate cost after retiring the legacy cost model.
2103 return 0;
2104 }
2105
2106 /// Returns true if the recipe only uses the first lane of operand \p Op.
2107 bool usesFirstLaneOnly(const VPValue *Op) const override;
2108
2109protected:
2110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2111 /// Print the recipe.
2112 void printRecipe(raw_ostream &O, const Twine &Indent,
2113 VPSlotTracker &SlotTracker) const override;
2114#endif
2115};
2116
2117/// A recipe to compute a pointer to the last element of each part of a widened
2118/// memory access for widened memory accesses of SourceElementTy. Used for
2119/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2120/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2121/// unroller otherwise.
2123 Type *SourceElementTy;
2124
2125 /// The constant stride of the pointer computed by this recipe, expressed in
2126 /// units of SourceElementTy.
2127 int64_t Stride;
2128
2129public:
2130 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2131 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2132 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2133 GEPFlags, DL),
2134 SourceElementTy(SourceElementTy), Stride(Stride) {
2135 assert(Stride < 0 && "Stride must be negative");
2136 }
2137
2138 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2139
2140 Type *getSourceElementType() const { return SourceElementTy; }
2141 int64_t getStride() const { return Stride; }
2142 VPValue *getPointer() const { return getOperand(0); }
2143 VPValue *getVFValue() const { return getOperand(1); }
2145 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2146 }
2147
2148 /// Adds the offset operand to the recipe.
2149 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2150 void materializeOffset(unsigned Part = 0);
2151
2152 void execute(VPTransformState &State) override;
2153
2154 bool usesFirstLaneOnly(const VPValue *Op) const override {
2156 "Op must be an operand of the recipe");
2157 return true;
2158 }
2159
2160 /// Return the cost of this VPVectorPointerRecipe.
2162 VPCostContext &Ctx) const override {
2163 // TODO: Compute accurate cost after retiring the legacy cost model.
2164 return 0;
2165 }
2166
2167 /// Returns true if the recipe only uses the first part of operand \p Op.
2168 bool usesFirstPartOnly(const VPValue *Op) const override {
2170 "Op must be an operand of the recipe");
2171 assert(getNumOperands() <= 2 && "must have at most two operands");
2172 return true;
2173 }
2174
2176 auto *VEPR = new VPVectorEndPointerRecipe(
2179 if (auto *Offset = getOffset())
2180 VEPR->addOperand(Offset);
2181 return VEPR;
2182 }
2183
2184protected:
2185#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2186 /// Print the recipe.
2187 void printRecipe(raw_ostream &O, const Twine &Indent,
2188 VPSlotTracker &SlotTracker) const override;
2189#endif
2190};
2191
2192/// A recipe to compute the pointers for widened memory accesses of \p
2193/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2194/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2196 Type *SourceElementTy;
2197
2198public:
2199 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2200 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2201 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2202 SourceElementTy(SourceElementTy) {}
2203
2204 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2205
2207 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2208 }
2209
2210 void execute(VPTransformState &State) override;
2211
2212 Type *getSourceElementType() const { return SourceElementTy; }
2213
2214 bool usesFirstLaneOnly(const VPValue *Op) const override {
2216 "Op must be an operand of the recipe");
2217 return true;
2218 }
2219
2220 /// Returns true if the recipe only uses the first part of operand \p Op.
2221 bool usesFirstPartOnly(const VPValue *Op) const override {
2223 "Op must be an operand of the recipe");
2224 assert(getNumOperands() <= 2 && "must have at most two operands");
2225 return true;
2226 }
2227
2229 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2231 if (auto *Off = getOffset())
2232 Clone->addOperand(Off);
2233 return Clone;
2234 }
2235
2236 /// Return the cost of this VPHeaderPHIRecipe.
2238 VPCostContext &Ctx) const override {
2239 // TODO: Compute accurate cost after retiring the legacy cost model.
2240 return 0;
2241 }
2242
2243protected:
2244#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2245 /// Print the recipe.
2246 void printRecipe(raw_ostream &O, const Twine &Indent,
2247 VPSlotTracker &SlotTracker) const override;
2248#endif
2249};
2250
2251/// A pure virtual base class for all recipes modeling header phis, including
2252/// phis for first order recurrences, pointer inductions and reductions. The
2253/// start value is the first operand of the recipe and the incoming value from
2254/// the backedge is the second operand.
2255///
2256/// Inductions are modeled using the following sub-classes:
2257/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2258/// starting at a specified value (zero for the main vector loop, the resume
2259/// value for the epilogue vector loop) and stepping by 1. The induction
2260/// controls exiting of the vector loop by comparing against the vector trip
2261/// count. Produces a single scalar PHI for the induction value per
2262/// iteration.
2263/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2264/// floating point inductions with arbitrary start and step values. Produces
2265/// a vector PHI per-part.
2266/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2267/// pointer induction. Produces either a vector PHI per-part or scalar values
2268/// per-lane based on the canonical induction.
2269/// * VPFirstOrderRecurrencePHIRecipe
2270/// * VPReductionPHIRecipe
2271/// * VPActiveLaneMaskPHIRecipe
2272/// * VPEVLBasedIVPHIRecipe
2274 public VPPhiAccessors {
2275protected:
2276 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2277 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2278 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2279
2280 const VPRecipeBase *getAsRecipe() const override { return this; }
2281
2282public:
2283 ~VPHeaderPHIRecipe() override = default;
2284
2285 /// Method to support type inquiry through isa, cast, and dyn_cast.
2286 static inline bool classof(const VPRecipeBase *R) {
2287 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2288 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2289 }
2290 static inline bool classof(const VPValue *V) {
2291 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2292 }
2293 static inline bool classof(const VPSingleDefRecipe *R) {
2294 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2295 }
2296
2297 /// Generate the phi nodes.
2298 void execute(VPTransformState &State) override = 0;
2299
2300 /// Return the cost of this header phi recipe.
2302 VPCostContext &Ctx) const override;
2303
2304 /// Returns the start value of the phi, if one is set.
2306 return getNumOperands() == 0 ? nullptr : getOperand(0);
2307 }
2309 return getNumOperands() == 0 ? nullptr : getOperand(0);
2310 }
2311
2312 /// Update the start value of the recipe.
2314
2315 /// Returns the incoming value from the loop backedge.
2317 return getOperand(1);
2318 }
2319
2320 /// Update the incoming value from the loop backedge.
2322
2323 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2324 /// to be a recipe.
2326 return *getBackedgeValue()->getDefiningRecipe();
2327 }
2328
2329protected:
2330#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2331 /// Print the recipe.
2332 void printRecipe(raw_ostream &O, const Twine &Indent,
2333 VPSlotTracker &SlotTracker) const override = 0;
2334#endif
2335};
2336
2337/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2338/// VPWidenPointerInductionRecipe), providing shared functionality, including
2339/// retrieving the step value, induction descriptor and original phi node.
2341 const InductionDescriptor &IndDesc;
2342
2343public:
2344 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2345 VPValue *Step, const InductionDescriptor &IndDesc,
2346 DebugLoc DL)
2347 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2348 addOperand(Step);
2349 }
2350
2351 static inline bool classof(const VPRecipeBase *R) {
2352 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2353 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2354 }
2355
2356 static inline bool classof(const VPValue *V) {
2357 auto *R = V->getDefiningRecipe();
2358 return R && classof(R);
2359 }
2360
2361 static inline bool classof(const VPSingleDefRecipe *R) {
2362 return classof(static_cast<const VPRecipeBase *>(R));
2363 }
2364
2365 void execute(VPTransformState &State) override = 0;
2366
2367 /// Returns the start value of the induction.
2369
2370 /// Returns the step value of the induction.
2372 const VPValue *getStepValue() const { return getOperand(1); }
2373
2374 /// Update the step value of the recipe.
2375 void setStepValue(VPValue *V) { setOperand(1, V); }
2376
2378 const VPValue *getVFValue() const { return getOperand(2); }
2379
2380 /// Returns the number of incoming values, also number of incoming blocks.
2381 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2382 /// incoming value, its start value.
2383 unsigned getNumIncoming() const override { return 1; }
2384
2385 /// Returns the underlying PHINode if one exists, or null otherwise.
2389
2390 /// Returns the induction descriptor for the recipe.
2391 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2392
2394 // TODO: All operands of base recipe must exist and be at same index in
2395 // derived recipe.
2397 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2398 }
2399
2401 // TODO: All operands of base recipe must exist and be at same index in
2402 // derived recipe.
2404 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2405 }
2406
2407 /// Returns true if the recipe only uses the first lane of operand \p Op.
2408 bool usesFirstLaneOnly(const VPValue *Op) const override {
2410 "Op must be an operand of the recipe");
2411 // The recipe creates its own wide start value, so it only requests the
2412 // first lane of the operand.
2413 // TODO: Remove once creating the start value is modeled separately.
2414 return Op == getStartValue() || Op == getStepValue();
2415 }
2416};
2417
2418/// A recipe for handling phi nodes of integer and floating-point inductions,
2419/// producing their vector values. This is an abstract recipe and must be
2420/// converted to concrete recipes before executing.
2422 public VPIRFlags {
2423 TruncInst *Trunc;
2424
2425 // If this recipe is unrolled it will have 2 additional operands.
2426 bool isUnrolled() const { return getNumOperands() == 5; }
2427
2428public:
2430 VPValue *VF, const InductionDescriptor &IndDesc,
2431 const VPIRFlags &Flags, DebugLoc DL)
2432 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2433 Start, Step, IndDesc, DL),
2434 VPIRFlags(Flags), Trunc(nullptr) {
2435 addOperand(VF);
2436 }
2437
2439 VPValue *VF, const InductionDescriptor &IndDesc,
2440 TruncInst *Trunc, const VPIRFlags &Flags,
2441 DebugLoc DL)
2442 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2443 Start, Step, IndDesc, DL),
2444 VPIRFlags(Flags), Trunc(Trunc) {
2445 addOperand(VF);
2447 (void)Metadata;
2448 if (Trunc)
2450 assert(Metadata.empty() && "unexpected metadata on Trunc");
2451 }
2452
2454
2460
2461 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2462
2463 void execute(VPTransformState &State) override {
2464 llvm_unreachable("cannot execute this recipe, should be expanded via "
2465 "expandVPWidenIntOrFpInductionRecipe");
2466 }
2467
2468 /// Returns the start value of the induction.
2470
2471 /// If the recipe has been unrolled, return the VPValue for the induction
2472 /// increment, otherwise return null.
2474 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2475 }
2476
2477 /// Returns the number of incoming values, also number of incoming blocks.
2478 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2479 /// incoming value, its start value.
2480 unsigned getNumIncoming() const override { return 1; }
2481
2482 /// Returns the first defined value as TruncInst, if it is one or nullptr
2483 /// otherwise.
2484 TruncInst *getTruncInst() { return Trunc; }
2485 const TruncInst *getTruncInst() const { return Trunc; }
2486
2487 /// Returns true if the induction is canonical, i.e. starting at 0 and
2488 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2489 /// same type as the canonical induction.
2490 bool isCanonical() const;
2491
2492 /// Returns the scalar type of the induction.
2494 return Trunc ? Trunc->getType() : getStartValue()->getType();
2495 }
2496
2497 /// Returns the VPValue representing the value of this induction at
2498 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2499 /// take place.
2501 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2502 }
2503
2504protected:
2505#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2506 /// Print the recipe.
2507 void printRecipe(raw_ostream &O, const Twine &Indent,
2508 VPSlotTracker &SlotTracker) const override;
2509#endif
2510};
2511
2513public:
2514 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2515 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2516 /// VF*UF.
2518 VPValue *NumUnrolledElems,
2519 const InductionDescriptor &IndDesc, DebugLoc DL)
2520 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2521 Start, Step, IndDesc, DL) {
2522 addOperand(NumUnrolledElems);
2523 }
2524
2526
2532
2533 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2534
2535 /// Generate vector values for the pointer induction.
2536 void execute(VPTransformState &State) override {
2537 llvm_unreachable("cannot execute this recipe, should be expanded via "
2538 "expandVPWidenPointerInduction");
2539 };
2540
2541 /// Returns true if only scalar values will be generated.
2542 bool onlyScalarsGenerated(bool IsScalable);
2543
2544protected:
2545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2546 /// Print the recipe.
2547 void printRecipe(raw_ostream &O, const Twine &Indent,
2548 VPSlotTracker &SlotTracker) const override;
2549#endif
2550};
2551
2552/// A recipe for widened phis. Incoming values are operands of the recipe and
2553/// their operand index corresponds to the incoming predecessor block. If the
2554/// recipe is placed in an entry block to a (non-replicate) region, it must have
2555/// exactly 2 incoming values, the first from the predecessor of the region and
2556/// the second from the exiting block of the region.
2558 public VPPhiAccessors {
2559 /// Name to use for the generated IR instruction for the widened phi.
2560 std::string Name;
2561
2562public:
2563 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2564 /// debug location \p DL.
2565 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2566 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2567 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2568 Name(Name.str()) {
2569 if (Start)
2570 addOperand(Start);
2571 }
2572
2574 auto *C =
2576 getOperand(0), getDebugLoc(), Name);
2578 C->addOperand(Op);
2579 return C;
2580 }
2581
2582 ~VPWidenPHIRecipe() override = default;
2583
2584 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2585
2586 /// Generate the phi/select nodes.
2587 void execute(VPTransformState &State) override;
2588
2589 /// Return the cost of this VPWidenPHIRecipe.
2591 VPCostContext &Ctx) const override;
2592
2593protected:
2594#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2595 /// Print the recipe.
2596 void printRecipe(raw_ostream &O, const Twine &Indent,
2597 VPSlotTracker &SlotTracker) const override;
2598#endif
2599
2600 const VPRecipeBase *getAsRecipe() const override { return this; }
2601};
2602
2603/// A recipe for handling first-order recurrence phis. The start value is the
2604/// first operand of the recipe and the incoming value from the backedge is the
2605/// second operand.
2608 VPValue &BackedgeValue)
2609 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2610 &Start) {
2611 addOperand(&BackedgeValue);
2612 }
2613
2614 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2615
2620
2621 void execute(VPTransformState &State) override;
2622
2623 /// Return the cost of this first-order recurrence phi recipe.
2625 VPCostContext &Ctx) const override;
2626
2627 /// Returns true if the recipe only uses the first lane of operand \p Op.
2628 bool usesFirstLaneOnly(const VPValue *Op) const override {
2630 "Op must be an operand of the recipe");
2631 return Op == getStartValue();
2632 }
2633
2634protected:
2635#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2636 /// Print the recipe.
2637 void printRecipe(raw_ostream &O, const Twine &Indent,
2638 VPSlotTracker &SlotTracker) const override;
2639#endif
2640};
2641
2642/// Possible variants of a reduction.
2643
2644/// This reduction is ordered and in-loop.
2645struct RdxOrdered {};
2646/// This reduction is in-loop.
2647struct RdxInLoop {};
2648/// This reduction is unordered with the partial result scaled down by some
2649/// factor.
2652};
2653using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2654
2655inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2656 unsigned ScaleFactor) {
2657 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2658 if (Ordered)
2659 return RdxOrdered{};
2660 if (InLoop)
2661 return RdxInLoop{};
2662 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2663}
2664
2665/// A recipe for handling reduction phis. The start value is the first operand
2666/// of the recipe and the incoming value from the backedge is the second
2667/// operand.
2669 /// The recurrence kind of the reduction.
2670 const RecurKind Kind;
2671
2672 ReductionStyle Style;
2673
2674 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2675 /// patterns for argmin/argmax).
2676 /// TODO: Also support cases where the phi itself has a single use, but its
2677 /// compare has multiple uses.
2678 bool HasUsesOutsideReductionChain;
2679
2680public:
2681 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2683 VPValue &BackedgeValue, ReductionStyle Style,
2684 const VPIRFlags &Flags,
2685 bool HasUsesOutsideReductionChain = false)
2686 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2687 VPIRFlags(Flags), Kind(Kind), Style(Style),
2688 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2689 addOperand(&BackedgeValue);
2690 }
2691
2692 ~VPReductionPHIRecipe() override = default;
2693
2695 return new VPReductionPHIRecipe(
2697 *getOperand(0), *getBackedgeValue(), Style, *this,
2698 HasUsesOutsideReductionChain);
2699 }
2700
2701 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2702
2703 /// Generate the phi/select nodes.
2704 void execute(VPTransformState &State) override;
2705
2706 /// Get the factor that the VF of this recipe's output should be scaled by, or
2707 /// 1 if it isn't scaled.
2708 unsigned getVFScaleFactor() const {
2709 auto *Partial = std::get_if<RdxUnordered>(&Style);
2710 return Partial ? Partial->VFScaleFactor : 1;
2711 }
2712
2713 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2714 /// > 1.
2715 void setVFScaleFactor(unsigned ScaleFactor) {
2716 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2717 Style = RdxUnordered{ScaleFactor};
2718 }
2719
2720 /// Returns the number of incoming values, also number of incoming blocks.
2721 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2722 /// incoming value, its start value.
2723 unsigned getNumIncoming() const override { return 2; }
2724
2725 /// Returns the recurrence kind of the reduction.
2726 RecurKind getRecurrenceKind() const { return Kind; }
2727
2728 /// Returns true, if the phi is part of an ordered reduction.
2729 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2730
2731 /// Returns true if the phi is part of an in-loop reduction.
2732 bool isInLoop() const {
2733 return std::holds_alternative<RdxInLoop>(Style) ||
2734 std::holds_alternative<RdxOrdered>(Style);
2735 }
2736
2737 /// Returns true if the reduction outputs a vector with a scaled down VF.
2738 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2739
2740 /// Returns true, if the phi is part of a multi-use reduction.
2742 return HasUsesOutsideReductionChain;
2743 }
2744
2745 /// Returns true if the recipe only uses the first lane of operand \p Op.
2746 bool usesFirstLaneOnly(const VPValue *Op) const override {
2748 "Op must be an operand of the recipe");
2749 return isOrdered() || isInLoop();
2750 }
2751
2752protected:
2753#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2754 /// Print the recipe.
2755 void printRecipe(raw_ostream &O, const Twine &Indent,
2756 VPSlotTracker &SlotTracker) const override;
2757#endif
2758};
2759
2760/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2761/// instructions.
2763public:
2764 /// The blend operation is a User of the incoming values and of their
2765 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2766 /// be omitted (implied by passing an odd number of operands) in which case
2767 /// all other incoming values are merged into it.
2769 const VPIRFlags &Flags, DebugLoc DL)
2770 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2771 assert(Operands.size() >= 2 && "Expected at least two operands!");
2772 setUnderlyingValue(Phi);
2773 }
2774
2775 VPBlendRecipe *clone() override {
2777 operands(), *this, getDebugLoc());
2778 }
2779
2780 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2781
2782 /// A normalized blend is one that has an odd number of operands, whereby the
2783 /// first operand does not have an associated mask.
2784 bool isNormalized() const { return getNumOperands() % 2; }
2785
2786 /// Return the number of incoming values, taking into account when normalized
2787 /// the first incoming value will have no mask.
2788 unsigned getNumIncomingValues() const {
2789 return (getNumOperands() + isNormalized()) / 2;
2790 }
2791
2792 /// Return incoming value number \p Idx.
2793 VPValue *getIncomingValue(unsigned Idx) const {
2794 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2795 }
2796
2797 /// Return mask number \p Idx.
2798 VPValue *getMask(unsigned Idx) const {
2799 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2800 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2801 }
2802
2803 /// Set mask number \p Idx to \p V.
2804 void setMask(unsigned Idx, VPValue *V) {
2805 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2806 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2807 }
2808
2809 void execute(VPTransformState &State) override {
2810 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2811 }
2812
2813 /// Return the cost of this VPWidenMemoryRecipe.
2814 InstructionCost computeCost(ElementCount VF,
2815 VPCostContext &Ctx) const override;
2816
2817 /// Returns true if the recipe only uses the first lane of operand \p Op.
2818 bool usesFirstLaneOnly(const VPValue *Op) const override {
2820 "Op must be an operand of the recipe");
2821 // Recursing through Blend recipes only, must terminate at header phi's the
2822 // latest.
2823 return all_of(users(),
2824 [this](VPUser *U) { return U->usesFirstLaneOnly(this); });
2825 }
2826
2827protected:
2828#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2829 /// Print the recipe.
2830 void printRecipe(raw_ostream &O, const Twine &Indent,
2831 VPSlotTracker &SlotTracker) const override;
2832#endif
2833};
2834
2835/// A common base class for interleaved memory operations.
2836/// An Interleaved memory operation is a memory access method that combines
2837/// multiple strided loads/stores into a single wide load/store with shuffles.
2838/// The first operand is the start address. The optional operands are, in order,
2839/// the stored values and the mask.
2841 public VPIRMetadata {
2843
2844 /// Indicates if the interleave group is in a conditional block and requires a
2845 /// mask.
2846 bool HasMask = false;
2847
2848 /// Indicates if gaps between members of the group need to be masked out or if
2849 /// unusued gaps can be loaded speculatively.
2850 bool NeedsMaskForGaps = false;
2851
2852protected:
2853 VPInterleaveBase(const unsigned char SC,
2855 ArrayRef<VPValue *> Operands,
2856 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2857 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2858 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2859 NeedsMaskForGaps(NeedsMaskForGaps) {
2860 // TODO: extend the masked interleaved-group support to reversed access.
2861 assert((!Mask || !IG->isReverse()) &&
2862 "Reversed masked interleave-group not supported.");
2863 if (StoredValues.empty()) {
2864 for (unsigned I = 0; I < IG->getFactor(); ++I)
2865 if (Instruction *Inst = IG->getMember(I)) {
2866 assert(!Inst->getType()->isVoidTy() && "must have result");
2867 new VPRecipeValue(this, Inst);
2868 }
2869 } else {
2870 for (auto *SV : StoredValues)
2871 addOperand(SV);
2872 }
2873 if (Mask) {
2874 HasMask = true;
2875 addOperand(Mask);
2876 }
2877 }
2878
2879public:
2880 VPInterleaveBase *clone() override = 0;
2881
2882 static inline bool classof(const VPRecipeBase *R) {
2883 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2884 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2885 }
2886
2887 static inline bool classof(const VPUser *U) {
2888 auto *R = dyn_cast<VPRecipeBase>(U);
2889 return R && classof(R);
2890 }
2891
2892 /// Return the address accessed by this recipe.
2893 VPValue *getAddr() const {
2894 return getOperand(0); // Address is the 1st, mandatory operand.
2895 }
2896
2897 /// Return the mask used by this recipe. Note that a full mask is represented
2898 /// by a nullptr.
2899 VPValue *getMask() const {
2900 // Mask is optional and the last operand.
2901 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2902 }
2903
2904 /// Return true if the access needs a mask because of the gaps.
2905 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2906
2908
2909 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2910
2911 void execute(VPTransformState &State) override {
2912 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2913 }
2914
2915 /// Return the cost of this recipe.
2916 InstructionCost computeCost(ElementCount VF,
2917 VPCostContext &Ctx) const override;
2918
2919 /// Returns true if the recipe only uses the first lane of operand \p Op.
2920 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2921
2922 /// Returns the number of stored operands of this interleave group. Returns 0
2923 /// for load interleave groups.
2924 virtual unsigned getNumStoreOperands() const = 0;
2925
2926 /// Return the VPValues stored by this interleave group. If it is a load
2927 /// interleave group, return an empty ArrayRef.
2929 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2931 }
2932};
2933
2934/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2935/// or stores into one wide load/store and shuffles. The first operand of a
2936/// VPInterleave recipe is the address, followed by the stored values, followed
2937/// by an optional mask.
2939public:
2941 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2942 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2943 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2944 Mask, NeedsMaskForGaps, MD, DL) {}
2945
2946 ~VPInterleaveRecipe() override = default;
2947
2951 needsMaskForGaps(), *this, getDebugLoc());
2952 }
2953
2954 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2955
2956 /// Generate the wide load or store, and shuffles.
2957 void execute(VPTransformState &State) override;
2958
2959 bool usesFirstLaneOnly(const VPValue *Op) const override {
2961 "Op must be an operand of the recipe");
2962 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2963 }
2964
2965 unsigned getNumStoreOperands() const override {
2966 return getNumOperands() - (getMask() ? 2 : 1);
2967 }
2968
2969protected:
2970#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2971 /// Print the recipe.
2972 void printRecipe(raw_ostream &O, const Twine &Indent,
2973 VPSlotTracker &SlotTracker) const override;
2974#endif
2975};
2976
2977/// A recipe for interleaved memory operations with vector-predication
2978/// intrinsics. The first operand is the address, the second operand is the
2979/// explicit vector length. Stored values and mask are optional operands.
2981public:
2983 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2984 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2985 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2986 R.getDebugLoc()) {
2987 assert(!getInterleaveGroup()->isReverse() &&
2988 "Reversed interleave-group with tail folding is not supported.");
2989 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2990 "supported for scalable vector.");
2991 }
2992
2993 ~VPInterleaveEVLRecipe() override = default;
2994
2996 llvm_unreachable("cloning not implemented yet");
2997 }
2998
2999 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3000
3001 /// The VPValue of the explicit vector length.
3002 VPValue *getEVL() const { return getOperand(1); }
3003
3004 /// Generate the wide load or store, and shuffles.
3005 void execute(VPTransformState &State) override;
3006
3007 /// The recipe only uses the first lane of the address, and EVL operand.
3008 bool usesFirstLaneOnly(const VPValue *Op) const override {
3010 "Op must be an operand of the recipe");
3011 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3012 Op == getEVL();
3013 }
3014
3015 unsigned getNumStoreOperands() const override {
3016 return getNumOperands() - (getMask() ? 3 : 2);
3017 }
3018
3019protected:
3020#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3021 /// Print the recipe.
3022 void printRecipe(raw_ostream &O, const Twine &Indent,
3023 VPSlotTracker &SlotTracker) const override;
3024#endif
3025};
3026
3027/// A recipe to represent inloop, ordered or partial reduction operations. It
3028/// performs a reduction on a vector operand into a scalar (vector in the case
3029/// of a partial reduction) value, and adds the result to a chain. The Operands
3030/// are {ChainOp, VecOp, [Condition]}.
3032
3033 /// The recurrence kind for the reduction in question.
3034 RecurKind RdxKind;
3035 /// Whether the reduction is conditional.
3036 bool IsConditional = false;
3037 ReductionStyle Style;
3038
3039protected:
3040 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3042 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3043 ReductionStyle Style, DebugLoc DL)
3044 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3045 Style(Style) {
3046 if (CondOp) {
3047 IsConditional = true;
3048 addOperand(CondOp);
3049 }
3051 }
3052
3053public:
3055 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3057 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3058 {ChainOp, VecOp}, CondOp, Style, DL) {}
3059
3061 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3063 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3064 {ChainOp, VecOp}, CondOp, Style, DL) {}
3065
3066 ~VPReductionRecipe() override = default;
3067
3069 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3071 getCondOp(), Style, getDebugLoc());
3072 }
3073
3074 static inline bool classof(const VPRecipeBase *R) {
3075 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3076 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3077 }
3078
3079 static inline bool classof(const VPUser *U) {
3080 auto *R = dyn_cast<VPRecipeBase>(U);
3081 return R && classof(R);
3082 }
3083
3084 static inline bool classof(const VPValue *VPV) {
3085 const VPRecipeBase *R = VPV->getDefiningRecipe();
3086 return R && classof(R);
3087 }
3088
3089 static inline bool classof(const VPSingleDefRecipe *R) {
3090 return classof(static_cast<const VPRecipeBase *>(R));
3091 }
3092
3093 /// Generate the reduction in the loop.
3094 void execute(VPTransformState &State) override;
3095
3096 /// Return the cost of VPReductionRecipe.
3097 InstructionCost computeCost(ElementCount VF,
3098 VPCostContext &Ctx) const override;
3099
3100 /// Return the recurrence kind for the in-loop reduction.
3101 RecurKind getRecurrenceKind() const { return RdxKind; }
3102 /// Return true if the in-loop reduction is ordered.
3103 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3104 /// Return true if the in-loop reduction is conditional.
3105 bool isConditional() const { return IsConditional; };
3106 /// Returns true if the reduction outputs a vector with a scaled down VF.
3107 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3108 /// Returns true if the reduction is in-loop.
3109 bool isInLoop() const {
3110 return std::holds_alternative<RdxInLoop>(Style) ||
3111 std::holds_alternative<RdxOrdered>(Style);
3112 }
3113 /// The VPValue of the scalar Chain being accumulated.
3114 VPValue *getChainOp() const { return getOperand(0); }
3115 /// The VPValue of the vector value to be reduced.
3116 VPValue *getVecOp() const { return getOperand(1); }
3117 /// The VPValue of the condition for the block.
3119 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3120 }
3121 /// Get the factor that the VF of this recipe's output should be scaled by, or
3122 /// 1 if it isn't scaled.
3123 unsigned getVFScaleFactor() const {
3124 auto *Partial = std::get_if<RdxUnordered>(&Style);
3125 return Partial ? Partial->VFScaleFactor : 1;
3126 }
3127
3128protected:
3129#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3130 /// Print the recipe.
3131 void printRecipe(raw_ostream &O, const Twine &Indent,
3132 VPSlotTracker &SlotTracker) const override;
3133#endif
3134};
3135
3136/// A recipe to represent inloop reduction operations with vector-predication
3137/// intrinsics, performing a reduction on a vector operand with the explicit
3138/// vector length (EVL) into a scalar value, and adding the result to a chain.
3139/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3141public:
3144 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3145 R.getFastMathFlags(),
3147 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3148 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3149 DL) {}
3150
3151 ~VPReductionEVLRecipe() override = default;
3152
3154 llvm_unreachable("cloning not implemented yet");
3155 }
3156
3157 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3158
3159 /// Generate the reduction in the loop
3160 void execute(VPTransformState &State) override;
3161
3162 /// The VPValue of the explicit vector length.
3163 VPValue *getEVL() const { return getOperand(2); }
3164
3165 /// Returns true if the recipe only uses the first lane of operand \p Op.
3166 bool usesFirstLaneOnly(const VPValue *Op) const override {
3168 "Op must be an operand of the recipe");
3169 return Op == getEVL();
3170 }
3171
3172protected:
3173#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3174 /// Print the recipe.
3175 void printRecipe(raw_ostream &O, const Twine &Indent,
3176 VPSlotTracker &SlotTracker) const override;
3177#endif
3178};
3179
3180/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3181/// copies of the original scalar type, one per lane, instead of producing a
3182/// single copy of widened type for all lanes. If the instruction is known to be
3183/// a single scalar, only one copy, per lane zero, will be generated.
3185 public VPIRMetadata {
3186 /// Indicator if only a single replica per lane is needed.
3187 bool IsSingleScalar;
3188
3189 /// Indicator if the replicas are also predicated.
3190 bool IsPredicated;
3191
3192public:
3194 bool IsSingleScalar, VPValue *Mask = nullptr,
3195 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3196 DebugLoc DL = DebugLoc::getUnknown())
3197 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3198 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3199 IsPredicated(Mask) {
3200 setUnderlyingValue(I);
3201 if (Mask)
3202 addOperand(Mask);
3203 }
3204
3205 ~VPReplicateRecipe() override = default;
3206
3208 auto *Copy = new VPReplicateRecipe(
3209 getUnderlyingInstr(), operands(), IsSingleScalar,
3210 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3211 Copy->transferFlags(*this);
3212 return Copy;
3213 }
3214
3215 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3216
3217 /// Generate replicas of the desired Ingredient. Replicas will be generated
3218 /// for all parts and lanes unless a specific part and lane are specified in
3219 /// the \p State.
3220 void execute(VPTransformState &State) override;
3221
3222 /// Return the cost of this VPReplicateRecipe.
3223 InstructionCost computeCost(ElementCount VF,
3224 VPCostContext &Ctx) const override;
3225
3226 bool isSingleScalar() const { return IsSingleScalar; }
3227
3228 bool isPredicated() const { return IsPredicated; }
3229
3230 /// Returns true if the recipe only uses the first lane of operand \p Op.
3231 bool usesFirstLaneOnly(const VPValue *Op) const override {
3233 "Op must be an operand of the recipe");
3234 return isSingleScalar();
3235 }
3236
3237 /// Returns true if the recipe uses scalars of operand \p Op.
3238 bool usesScalars(const VPValue *Op) const override {
3240 "Op must be an operand of the recipe");
3241 return true;
3242 }
3243
3244 /// Returns true if the recipe is used by a widened recipe via an intervening
3245 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3246 /// in a vector.
3247 bool shouldPack() const;
3248
3249 /// Return the mask of a predicated VPReplicateRecipe.
3251 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3252 return getOperand(getNumOperands() - 1);
3253 }
3254
3255 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3256
3257protected:
3258#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3259 /// Print the recipe.
3260 void printRecipe(raw_ostream &O, const Twine &Indent,
3261 VPSlotTracker &SlotTracker) const override;
3262#endif
3263};
3264
3265/// A recipe for generating conditional branches on the bits of a mask.
3267public:
3269 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3270
3273 }
3274
3275 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3276
3277 /// Generate the extraction of the appropriate bit from the block mask and the
3278 /// conditional branch.
3279 void execute(VPTransformState &State) override;
3280
3281 /// Return the cost of this VPBranchOnMaskRecipe.
3282 InstructionCost computeCost(ElementCount VF,
3283 VPCostContext &Ctx) const override;
3284
3285#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3286 /// Print the recipe.
3287 void printRecipe(raw_ostream &O, const Twine &Indent,
3288 VPSlotTracker &SlotTracker) const override {
3289 O << Indent << "BRANCH-ON-MASK ";
3291 }
3292#endif
3293
3294 /// Returns true if the recipe uses scalars of operand \p Op.
3295 bool usesScalars(const VPValue *Op) const override {
3297 "Op must be an operand of the recipe");
3298 return true;
3299 }
3300};
3301
3302/// A recipe to combine multiple recipes into a single 'expression' recipe,
3303/// which should be considered a single entity for cost-modeling and transforms.
3304/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3305/// expression recipes, before execute. The individual expression recipes are
3306/// completely disconnected from the def-use graph of other recipes not part of
3307/// the expression. Def-use edges between pairs of expression recipes remain
3308/// intact, whereas every edge between an expression recipe and a recipe outside
3309/// the expression is elevated to connect the non-expression recipe with the
3310/// VPExpressionRecipe itself.
3311class VPExpressionRecipe : public VPSingleDefRecipe {
3312 /// Recipes included in this VPExpressionRecipe. This could contain
3313 /// duplicates.
3314 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3315
3316 /// Temporary VPValues used for external operands of the expression, i.e.
3317 /// operands not defined by recipes in the expression.
3318 SmallVector<VPValue *> LiveInPlaceholders;
3319
3320 enum class ExpressionTypes {
3321 /// Represents an inloop extended reduction operation, performing a
3322 /// reduction on an extended vector operand into a scalar value, and adding
3323 /// the result to a chain.
3324 ExtendedReduction,
3325 /// Represent an inloop multiply-accumulate reduction, multiplying the
3326 /// extended vector operands, performing a reduction.add on the result, and
3327 /// adding the scalar result to a chain.
3328 ExtMulAccReduction,
3329 /// Represent an inloop multiply-accumulate reduction, multiplying the
3330 /// vector operands, performing a reduction.add on the result, and adding
3331 /// the scalar result to a chain.
3332 MulAccReduction,
3333 /// Represent an inloop multiply-accumulate reduction, multiplying the
3334 /// extended vector operands, negating the multiplication, performing a
3335 /// reduction.add on the result, and adding the scalar result to a chain.
3336 ExtNegatedMulAccReduction,
3337 };
3338
3339 /// Type of the expression.
3340 ExpressionTypes ExpressionType;
3341
3342 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3343 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3344 /// in the expression) are replaced by temporary VPValues and the original
3345 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3346 /// as needed (excluding last) to ensure they are only used by other recipes
3347 /// in the expression.
3348 VPExpressionRecipe(ExpressionTypes ExpressionType,
3349 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3350
3351public:
3353 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3355 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3358 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3359 {Ext0, Ext1, Mul, Red}) {}
3362 VPReductionRecipe *Red)
3363 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3364 {Ext0, Ext1, Mul, Sub, Red}) {
3365 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3366 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3367 "Expected an add reduction");
3368 assert(getNumOperands() >= 3 && "Expected at least three operands");
3369 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3370 assert(SubConst && SubConst->isZero() &&
3371 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3372 }
3373
3375 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3376 for (auto *R : reverse(ExpressionRecipes)) {
3377 if (ExpressionRecipesSeen.insert(R).second)
3378 delete R;
3379 }
3380 for (VPValue *T : LiveInPlaceholders)
3381 delete T;
3382 }
3383
3384 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3385
3386 VPExpressionRecipe *clone() override {
3387 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3388 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3389 for (auto *R : ExpressionRecipes)
3390 NewExpressiondRecipes.push_back(R->clone());
3391 for (auto *New : NewExpressiondRecipes) {
3392 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3393 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3394 // Update placeholder operands in the cloned recipe to use the external
3395 // operands, to be internalized when the cloned expression is constructed.
3396 for (const auto &[Placeholder, OutsideOp] :
3397 zip(LiveInPlaceholders, operands()))
3398 New->replaceUsesOfWith(Placeholder, OutsideOp);
3399 }
3400 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3401 }
3402
3403 /// Return the VPValue to use to infer the result type of the recipe.
3405 unsigned OpIdx =
3406 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3407 : 1;
3408 return getOperand(getNumOperands() - OpIdx);
3409 }
3410
3411 /// Insert the recipes of the expression back into the VPlan, directly before
3412 /// the current recipe. Leaves the expression recipe empty, which must be
3413 /// removed before codegen.
3414 void decompose();
3415
3416 unsigned getVFScaleFactor() const {
3417 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3418 return PR ? PR->getVFScaleFactor() : 1;
3419 }
3420
3421 /// Method for generating code, must not be called as this recipe is abstract.
3422 void execute(VPTransformState &State) override {
3423 llvm_unreachable("recipe must be removed before execute");
3424 }
3425
3427 VPCostContext &Ctx) const override;
3428
3429 /// Returns true if this expression contains recipes that may read from or
3430 /// write to memory.
3431 bool mayReadOrWriteMemory() const;
3432
3433 /// Returns true if this expression contains recipes that may have side
3434 /// effects.
3435 bool mayHaveSideEffects() const;
3436
3437 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3438 bool isSingleScalar() const;
3439
3440protected:
3441#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3442 /// Print the recipe.
3443 void printRecipe(raw_ostream &O, const Twine &Indent,
3444 VPSlotTracker &SlotTracker) const override;
3445#endif
3446};
3447
3448/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3449/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3450/// order to merge values that are set under such a branch and feed their uses.
3451/// The phi nodes can be scalar or vector depending on the users of the value.
3452/// This recipe works in concert with VPBranchOnMaskRecipe.
3454public:
3455 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3456 /// nodes after merging back from a Branch-on-Mask.
3458 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3459 ~VPPredInstPHIRecipe() override = default;
3460
3462 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3463 }
3464
3465 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3466
3467 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3468 /// retain SSA form.
3469 void execute(VPTransformState &State) override;
3470
3471 /// Return the cost of this VPPredInstPHIRecipe.
3473 VPCostContext &Ctx) const override {
3474 // TODO: Compute accurate cost after retiring the legacy cost model.
3475 return 0;
3476 }
3477
3478 /// Returns true if the recipe uses scalars of operand \p Op.
3479 bool usesScalars(const VPValue *Op) const override {
3481 "Op must be an operand of the recipe");
3482 return true;
3483 }
3484
3485protected:
3486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3487 /// Print the recipe.
3488 void printRecipe(raw_ostream &O, const Twine &Indent,
3489 VPSlotTracker &SlotTracker) const override;
3490#endif
3491};
3492
3493/// A common base class for widening memory operations. An optional mask can be
3494/// provided as the last operand.
3496 public VPIRMetadata {
3497protected:
3499
3500 /// Alignment information for this memory access.
3502
3503 /// Whether the accessed addresses are consecutive.
3505
3506 /// Whether the consecutive accessed addresses are in reverse order.
3508
3509 /// Whether the memory access is masked.
3510 bool IsMasked = false;
3511
3512 void setMask(VPValue *Mask) {
3513 assert(!IsMasked && "cannot re-set mask");
3514 if (!Mask)
3515 return;
3516 addOperand(Mask);
3517 IsMasked = true;
3518 }
3519
3520 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3521 std::initializer_list<VPValue *> Operands,
3522 bool Consecutive, bool Reverse,
3523 const VPIRMetadata &Metadata, DebugLoc DL)
3524 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3526 Reverse(Reverse) {
3527 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
3529 "Reversed acccess without VPVectorEndPointerRecipe address?");
3530 }
3531
3532public:
3534 llvm_unreachable("cloning not supported");
3535 }
3536
3537 static inline bool classof(const VPRecipeBase *R) {
3538 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3539 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3540 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3541 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3542 }
3543
3544 static inline bool classof(const VPUser *U) {
3545 auto *R = dyn_cast<VPRecipeBase>(U);
3546 return R && classof(R);
3547 }
3548
3549 /// Return whether the loaded-from / stored-to addresses are consecutive.
3550 bool isConsecutive() const { return Consecutive; }
3551
3552 /// Return whether the consecutive loaded/stored addresses are in reverse
3553 /// order.
3554 bool isReverse() const { return Reverse; }
3555
3556 /// Return the address accessed by this recipe.
3557 VPValue *getAddr() const { return getOperand(0); }
3558
3559 /// Returns true if the recipe is masked.
3560 bool isMasked() const { return IsMasked; }
3561
3562 /// Return the mask used by this recipe. Note that a full mask is represented
3563 /// by a nullptr.
3564 VPValue *getMask() const {
3565 // Mask is optional and therefore the last operand.
3566 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3567 }
3568
3569 /// Returns the alignment of the memory access.
3570 Align getAlign() const { return Alignment; }
3571
3572 /// Generate the wide load/store.
3573 void execute(VPTransformState &State) override {
3574 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3575 }
3576
3577 /// Return the cost of this VPWidenMemoryRecipe.
3578 InstructionCost computeCost(ElementCount VF,
3579 VPCostContext &Ctx) const override;
3580
3582};
3583
3584/// A recipe for widening load operations, using the address to load from and an
3585/// optional mask.
3587 public VPRecipeValue {
3589 bool Consecutive, bool Reverse,
3590 const VPIRMetadata &Metadata, DebugLoc DL)
3591 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3592 Consecutive, Reverse, Metadata, DL),
3593 VPRecipeValue(this, &Load) {
3594 setMask(Mask);
3595 }
3596
3599 getMask(), Consecutive, Reverse, *this,
3600 getDebugLoc());
3601 }
3602
3603 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3604
3605 /// Generate a wide load or gather.
3606 void execute(VPTransformState &State) override;
3607
3608 /// Returns true if the recipe only uses the first lane of operand \p Op.
3609 bool usesFirstLaneOnly(const VPValue *Op) const override {
3611 "Op must be an operand of the recipe");
3612 // Widened, consecutive loads operations only demand the first lane of
3613 // their address.
3614 return Op == getAddr() && isConsecutive();
3615 }
3616
3617protected:
3618#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3619 /// Print the recipe.
3620 void printRecipe(raw_ostream &O, const Twine &Indent,
3621 VPSlotTracker &SlotTracker) const override;
3622#endif
3623};
3624
3625/// A recipe for widening load operations with vector-predication intrinsics,
3626/// using the address to load from, the explicit vector length and an optional
3627/// mask.
3629 public VPRecipeValue {
3631 VPValue *Mask)
3632 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3633 {Addr, &EVL}, L.isConsecutive(), L.isReverse(), L,
3634 L.getDebugLoc()),
3635 VPRecipeValue(this, &getIngredient()) {
3636 setMask(Mask);
3637 }
3638
3639 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3640
3641 /// Return the EVL operand.
3642 VPValue *getEVL() const { return getOperand(1); }
3643
3644 /// Generate the wide load or gather.
3645 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3646
3647 /// Return the cost of this VPWidenLoadEVLRecipe.
3649 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3650
3651 /// Returns true if the recipe only uses the first lane of operand \p Op.
3652 bool usesFirstLaneOnly(const VPValue *Op) const override {
3654 "Op must be an operand of the recipe");
3655 // Widened loads only demand the first lane of EVL and consecutive loads
3656 // only demand the first lane of their address.
3657 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3658 }
3659
3660protected:
3661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3662 /// Print the recipe.
3663 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3664 VPSlotTracker &SlotTracker) const override;
3665#endif
3666};
3667
3668/// A recipe for widening store operations, using the stored value, the address
3669/// to store to and an optional mask.
3671 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3672 VPValue *Mask, bool Consecutive, bool Reverse,
3673 const VPIRMetadata &Metadata, DebugLoc DL)
3674 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3675 {Addr, StoredVal}, Consecutive, Reverse, Metadata,
3676 DL) {
3677 setMask(Mask);
3678 }
3679
3685
3686 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3687
3688 /// Return the value stored by this recipe.
3689 VPValue *getStoredValue() const { return getOperand(1); }
3690
3691 /// Generate a wide store or scatter.
3692 void execute(VPTransformState &State) override;
3693
3694 /// Returns true if the recipe only uses the first lane of operand \p Op.
3695 bool usesFirstLaneOnly(const VPValue *Op) const override {
3697 "Op must be an operand of the recipe");
3698 // Widened, consecutive stores only demand the first lane of their address,
3699 // unless the same operand is also stored.
3700 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3701 }
3702
3703protected:
3704#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3705 /// Print the recipe.
3706 void printRecipe(raw_ostream &O, const Twine &Indent,
3707 VPSlotTracker &SlotTracker) const override;
3708#endif
3709};
3710
3711/// A recipe for widening store operations with vector-predication intrinsics,
3712/// using the value to store, the address to store to, the explicit vector
3713/// length and an optional mask.
3716 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3717 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3718 {Addr, StoredVal, &EVL}, S.isConsecutive(),
3719 S.isReverse(), S, S.getDebugLoc()) {
3720 setMask(Mask);
3721 }
3722
3723 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3724
3725 /// Return the address accessed by this recipe.
3726 VPValue *getStoredValue() const { return getOperand(1); }
3727
3728 /// Return the EVL operand.
3729 VPValue *getEVL() const { return getOperand(2); }
3730
3731 /// Generate the wide store or scatter.
3732 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3733
3734 /// Return the cost of this VPWidenStoreEVLRecipe.
3736 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3737
3738 /// Returns true if the recipe only uses the first lane of operand \p Op.
3739 bool usesFirstLaneOnly(const VPValue *Op) const override {
3741 "Op must be an operand of the recipe");
3742 if (Op == getEVL()) {
3743 assert(getStoredValue() != Op && "unexpected store of EVL");
3744 return true;
3745 }
3746 // Widened, consecutive memory operations only demand the first lane of
3747 // their address, unless the same operand is also stored. That latter can
3748 // happen with opaque pointers.
3749 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3750 }
3751
3752protected:
3753#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3754 /// Print the recipe.
3755 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3756 VPSlotTracker &SlotTracker) const override;
3757#endif
3758};
3759
3760/// Recipe to expand a SCEV expression.
3762 const SCEV *Expr;
3763
3764public:
3766 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3767
3768 ~VPExpandSCEVRecipe() override = default;
3769
3770 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3771
3772 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3773
3774 void execute(VPTransformState &State) override {
3775 llvm_unreachable("SCEV expressions must be expanded before final execute");
3776 }
3777
3778 /// Return the cost of this VPExpandSCEVRecipe.
3780 VPCostContext &Ctx) const override {
3781 // TODO: Compute accurate cost after retiring the legacy cost model.
3782 return 0;
3783 }
3784
3785 const SCEV *getSCEV() const { return Expr; }
3786
3787protected:
3788#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3789 /// Print the recipe.
3790 void printRecipe(raw_ostream &O, const Twine &Indent,
3791 VPSlotTracker &SlotTracker) const override;
3792#endif
3793};
3794
3795/// Canonical scalar induction phi of the vector loop. Starting at the specified
3796/// start value (either 0 or the resume value when vectorizing the epilogue
3797/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3798/// canonical induction variable.
3800public:
3802 : VPHeaderPHIRecipe(VPRecipeBase::VPCanonicalIVPHISC, nullptr, StartV,
3803 DL) {}
3804
3805 ~VPCanonicalIVPHIRecipe() override = default;
3806
3809 R->addOperand(getBackedgeValue());
3810 return R;
3811 }
3812
3813 VP_CLASSOF_IMPL(VPRecipeBase::VPCanonicalIVPHISC)
3814
3815 void execute(VPTransformState &State) override {
3816 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3817 "scalar phi recipe");
3818 }
3819
3820 /// Returns the start value of the canonical induction.
3822
3823 /// Returns the scalar type of the induction.
3824 Type *getScalarType() const { return getStartValue()->getType(); }
3825
3826 /// Returns true if the recipe only uses the first lane of operand \p Op.
3827 bool usesFirstLaneOnly(const VPValue *Op) const override {
3829 "Op must be an operand of the recipe");
3830 return true;
3831 }
3832
3833 /// Returns true if the recipe only uses the first part of operand \p Op.
3834 bool usesFirstPartOnly(const VPValue *Op) const override {
3836 "Op must be an operand of the recipe");
3837 return true;
3838 }
3839
3840 /// Return the cost of this VPCanonicalIVPHIRecipe.
3842 VPCostContext &Ctx) const override {
3843 // For now, match the behavior of the legacy cost model.
3844 return 0;
3845 }
3846
3847protected:
3848#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3849 /// Print the recipe.
3850 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3851 VPSlotTracker &SlotTracker) const override;
3852#endif
3853};
3854
3855/// A recipe for generating the active lane mask for the vector loop that is
3856/// used to predicate the vector operations.
3858public:
3860 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3861 StartMask, DL) {}
3862
3863 ~VPActiveLaneMaskPHIRecipe() override = default;
3864
3867 if (getNumOperands() == 2)
3868 R->addOperand(getOperand(1));
3869 return R;
3870 }
3871
3872 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3873
3874 /// Generate the active lane mask phi of the vector loop.
3875 void execute(VPTransformState &State) override;
3876
3877protected:
3878#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3879 /// Print the recipe.
3880 void printRecipe(raw_ostream &O, const Twine &Indent,
3881 VPSlotTracker &SlotTracker) const override;
3882#endif
3883};
3884
3885/// A recipe for generating the phi node tracking the current scalar iteration
3886/// index. It starts at the start value of the canonical induction and gets
3887/// incremented by the number of scalar iterations processed by the vector loop
3888/// iteration. The increment does not have to be loop invariant.
3890public:
3892 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3893 StartIV, DL) {}
3894
3895 ~VPCurrentIterationPHIRecipe() override = default;
3896
3898 llvm_unreachable("cloning not implemented yet");
3899 }
3900
3901 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3902
3903 void execute(VPTransformState &State) override {
3904 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3905 "scalar phi recipe");
3906 }
3907
3908 /// Return the cost of this VPCurrentIterationPHIRecipe.
3910 VPCostContext &Ctx) const override {
3911 // For now, match the behavior of the legacy cost model.
3912 return 0;
3913 }
3914
3915 /// Returns true if the recipe only uses the first lane of operand \p Op.
3916 bool usesFirstLaneOnly(const VPValue *Op) const override {
3918 "Op must be an operand of the recipe");
3919 return true;
3920 }
3921
3922protected:
3923#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3924 /// Print the recipe.
3925 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3926 VPSlotTracker &SlotTracker) const override;
3927#endif
3928};
3929
3930/// A Recipe for widening the canonical induction variable of the vector loop.
3932 public VPUnrollPartAccessor<1> {
3933public:
3935 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3936
3937 ~VPWidenCanonicalIVRecipe() override = default;
3938
3943
3944 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3945
3946 /// Generate a canonical vector induction variable of the vector loop, with
3947 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3948 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3949 void execute(VPTransformState &State) override;
3950
3951 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3953 VPCostContext &Ctx) const override {
3954 // TODO: Compute accurate cost after retiring the legacy cost model.
3955 return 0;
3956 }
3957
3958protected:
3959#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3960 /// Print the recipe.
3961 void printRecipe(raw_ostream &O, const Twine &Indent,
3962 VPSlotTracker &SlotTracker) const override;
3963#endif
3964};
3965
3966/// A recipe for converting the input value \p IV value to the corresponding
3967/// value of an IV with different start and step values, using Start + IV *
3968/// Step.
3970 /// Kind of the induction.
3972 /// If not nullptr, the floating point induction binary operator. Must be set
3973 /// for floating point inductions.
3974 const FPMathOperator *FPBinOp;
3975
3976 /// Name to use for the generated IR instruction for the derived IV.
3977 std::string Name;
3978
3979public:
3981 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3982 const Twine &Name = "")
3984 IndDesc.getKind(),
3985 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3986 Start, CanonicalIV, Step, Name) {}
3987
3989 const FPMathOperator *FPBinOp, VPIRValue *Start,
3990 VPValue *IV, VPValue *Step, const Twine &Name = "")
3991 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3992 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
3993
3994 ~VPDerivedIVRecipe() override = default;
3995
3997 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3998 getStepValue());
3999 }
4000
4001 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4002
4003 /// Generate the transformed value of the induction at offset StartValue (1.
4004 /// operand) + IV (2. operand) * StepValue (3, operand).
4005 void execute(VPTransformState &State) override;
4006
4007 /// Return the cost of this VPDerivedIVRecipe.
4009 VPCostContext &Ctx) const override {
4010 // TODO: Compute accurate cost after retiring the legacy cost model.
4011 return 0;
4012 }
4013
4014 Type *getScalarType() const { return getStartValue()->getType(); }
4015
4017 VPValue *getStepValue() const { return getOperand(2); }
4018
4019 /// Returns true if the recipe only uses the first lane of operand \p Op.
4020 bool usesFirstLaneOnly(const VPValue *Op) const override {
4022 "Op must be an operand of the recipe");
4023 return true;
4024 }
4025
4026protected:
4027#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4028 /// Print the recipe.
4029 void printRecipe(raw_ostream &O, const Twine &Indent,
4030 VPSlotTracker &SlotTracker) const override;
4031#endif
4032};
4033
4034/// A recipe for handling phi nodes of integer and floating-point inductions,
4035/// producing their scalar values. Before unrolling by UF the recipe represents
4036/// the VF*UF scalar values to be produced, or UF scalar values if only first
4037/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4038/// operand StartIndex to all unroll parts except part 0, as the recipe
4039/// represents the VF scalar values (this number of values is taken from
4040/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4042 Instruction::BinaryOps InductionOpcode;
4043
4044public:
4047 DebugLoc DL)
4048 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4049 FMFs, DL),
4050 InductionOpcode(Opcode) {}
4051
4053 VPValue *Step, VPValue *VF,
4056 IV, Step, VF, IndDesc.getInductionOpcode(),
4057 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4058 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4059 : FastMathFlags(),
4060 DL) {}
4061
4062 ~VPScalarIVStepsRecipe() override = default;
4063
4065 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4066 getOperand(2), InductionOpcode,
4068 if (VPValue *StartIndex = getStartIndex())
4069 NewR->setStartIndex(StartIndex);
4070 return NewR;
4071 }
4072
4073 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4074
4075 /// Generate the scalarized versions of the phi node as needed by their users.
4076 void execute(VPTransformState &State) override;
4077
4078 /// Return the cost of this VPScalarIVStepsRecipe.
4080 VPCostContext &Ctx) const override {
4081 // TODO: Compute accurate cost after retiring the legacy cost model.
4082 return 0;
4083 }
4084
4085 VPValue *getStepValue() const { return getOperand(1); }
4086
4087 /// Return the number of scalars to produce per unroll part, used to compute
4088 /// StartIndex during unrolling.
4089 VPValue *getVFValue() const { return getOperand(2); }
4090
4091 /// Return the StartIndex, or null if known to be zero, valid only after
4092 /// unrolling.
4094 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4095 }
4096
4097 /// Set or add the StartIndex operand.
4098 void setStartIndex(VPValue *StartIndex) {
4099 if (getNumOperands() == 4)
4100 setOperand(3, StartIndex);
4101 else
4102 addOperand(StartIndex);
4103 }
4104
4105 /// Returns true if the recipe only uses the first lane of operand \p Op.
4106 bool usesFirstLaneOnly(const VPValue *Op) const override {
4108 "Op must be an operand of the recipe");
4109 return true;
4110 }
4111
4112 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4113
4114protected:
4115#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4116 /// Print the recipe.
4117 void printRecipe(raw_ostream &O, const Twine &Indent,
4118 VPSlotTracker &SlotTracker) const override;
4119#endif
4120};
4121
4122/// Casting from VPRecipeBase -> VPPhiAccessors is supported for all recipe
4123/// types implementing VPPhiAccessors. Used by isa<> & co.
4125 static inline bool isPossible(const VPRecipeBase *f) {
4126 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4128 }
4129};
4130/// Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the
4131/// recipe types implementing VPPhiAccessors. Used by cast<>, dyn_cast<> & co.
4132template <typename SrcTy>
4133struct CastInfoVPPhiAccessors : public CastIsPossible<VPPhiAccessors, SrcTy> {
4134
4136
4137 /// doCast is used by cast<>.
4138 static inline VPPhiAccessors *doCast(SrcTy R) {
4139 return const_cast<VPPhiAccessors *>([R]() -> const VPPhiAccessors * {
4140 switch (R->getVPRecipeID()) {
4141 case VPRecipeBase::VPInstructionSC:
4142 return cast<VPPhi>(R);
4143 case VPRecipeBase::VPIRInstructionSC:
4144 return cast<VPIRPhi>(R);
4145 case VPRecipeBase::VPWidenPHISC:
4146 return cast<VPWidenPHIRecipe>(R);
4147 default:
4148 return cast<VPHeaderPHIRecipe>(R);
4149 }
4150 }());
4151 }
4152
4153 /// doCastIfPossible is used by dyn_cast<>.
4154 static inline VPPhiAccessors *doCastIfPossible(SrcTy f) {
4155 if (!Self::isPossible(f))
4156 return nullptr;
4157 return doCast(f);
4158 }
4159};
4160template <>
4163template <>
4166
4167/// Casting from (const) VPRecipeBase -> (const) VPIRMetadata is supported for
4168/// all recipe types implementing VPIRMetadata. Used by isa<> & co.
4169namespace detail {
4170template <typename DstTy, typename RecipeBasePtrTy>
4171static inline auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy {
4172 switch (R->getVPRecipeID()) {
4173 case VPRecipeBase::VPInstructionSC:
4174 return cast<VPInstruction>(R);
4175 case VPRecipeBase::VPWidenSC:
4176 return cast<VPWidenRecipe>(R);
4177 case VPRecipeBase::VPWidenCastSC:
4178 return cast<VPWidenCastRecipe>(R);
4179 case VPRecipeBase::VPWidenIntrinsicSC:
4181 case VPRecipeBase::VPWidenCallSC:
4182 return cast<VPWidenCallRecipe>(R);
4183 case VPRecipeBase::VPReplicateSC:
4184 return cast<VPReplicateRecipe>(R);
4185 case VPRecipeBase::VPInterleaveSC:
4186 case VPRecipeBase::VPInterleaveEVLSC:
4187 return cast<VPInterleaveBase>(R);
4188 case VPRecipeBase::VPWidenLoadSC:
4189 case VPRecipeBase::VPWidenLoadEVLSC:
4190 case VPRecipeBase::VPWidenStoreSC:
4191 case VPRecipeBase::VPWidenStoreEVLSC:
4192 return cast<VPWidenMemoryRecipe>(R);
4193 default:
4194 llvm_unreachable("invalid recipe for VPIRMetadata cast");
4195 }
4196}
4197} // namespace detail
4198
4199/// Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the
4200/// recipe types implementing VPIRMetadata. Used by cast<>, dyn_cast<> & co.
4201template <typename DstTy, typename SrcTy>
4202struct CastInfoVPIRMetadata : public CastIsPossible<DstTy, SrcTy> {
4203 static inline bool isPossible(SrcTy R) {
4204 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here and
4205 // also handled in castToVPIRMetadata.
4210 R);
4211 }
4212
4213 using RetTy = DstTy *;
4214
4215 /// doCast is used by cast<>.
4216 static inline RetTy doCast(SrcTy R) {
4218 }
4219
4220 /// doCastIfPossible is used by dyn_cast<>.
4221 static inline RetTy doCastIfPossible(SrcTy R) {
4222 if (!isPossible(R))
4223 return nullptr;
4224 return doCast(R);
4225 }
4226};
4227template <>
4230template <>
4233
4234/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4235/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4236/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4237class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4238 friend class VPlan;
4239
4240 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4241 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4242 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4243 if (Recipe)
4244 appendRecipe(Recipe);
4245 }
4246
4247public:
4249
4250protected:
4251 /// The VPRecipes held in the order of output instructions to generate.
4253
4254 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4255 : VPBlockBase(BlockSC, Name.str()) {}
4256
4257public:
4258 ~VPBasicBlock() override {
4259 while (!Recipes.empty())
4260 Recipes.pop_back();
4261 }
4262
4263 /// Instruction iterators...
4268
4269 //===--------------------------------------------------------------------===//
4270 /// Recipe iterator methods
4271 ///
4272 inline iterator begin() { return Recipes.begin(); }
4273 inline const_iterator begin() const { return Recipes.begin(); }
4274 inline iterator end() { return Recipes.end(); }
4275 inline const_iterator end() const { return Recipes.end(); }
4276
4277 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4278 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4279 inline reverse_iterator rend() { return Recipes.rend(); }
4280 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4281
4282 inline size_t size() const { return Recipes.size(); }
4283 inline bool empty() const { return Recipes.empty(); }
4284 inline const VPRecipeBase &front() const { return Recipes.front(); }
4285 inline VPRecipeBase &front() { return Recipes.front(); }
4286 inline const VPRecipeBase &back() const { return Recipes.back(); }
4287 inline VPRecipeBase &back() { return Recipes.back(); }
4288
4289 /// Returns a reference to the list of recipes.
4291
4292 /// Returns a pointer to a member of the recipe list.
4293 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4294 return &VPBasicBlock::Recipes;
4295 }
4296
4297 /// Method to support type inquiry through isa, cast, and dyn_cast.
4298 static inline bool classof(const VPBlockBase *V) {
4299 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4300 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4301 }
4302
4303 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4304 assert(Recipe && "No recipe to append.");
4305 assert(!Recipe->Parent && "Recipe already in VPlan");
4306 Recipe->Parent = this;
4307 Recipes.insert(InsertPt, Recipe);
4308 }
4309
4310 /// Augment the existing recipes of a VPBasicBlock with an additional
4311 /// \p Recipe as the last recipe.
4312 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4313
4314 /// The method which generates the output IR instructions that correspond to
4315 /// this VPBasicBlock, thereby "executing" the VPlan.
4316 void execute(VPTransformState *State) override;
4317
4318 /// Return the cost of this VPBasicBlock.
4319 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4320
4321 /// Return the position of the first non-phi node recipe in the block.
4322 iterator getFirstNonPhi();
4323
4324 /// Returns an iterator range over the PHI-like recipes in the block.
4328
4329 /// Split current block at \p SplitAt by inserting a new block between the
4330 /// current block and its successors and moving all recipes starting at
4331 /// SplitAt to the new block. Returns the new block.
4332 VPBasicBlock *splitAt(iterator SplitAt);
4333
4334 VPRegionBlock *getEnclosingLoopRegion();
4335 const VPRegionBlock *getEnclosingLoopRegion() const;
4336
4337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4338 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4339 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4340 ///
4341 /// Note that the numbering is applied to the whole VPlan, so printing
4342 /// individual blocks is consistent with the whole VPlan printing.
4343 void print(raw_ostream &O, const Twine &Indent,
4344 VPSlotTracker &SlotTracker) const override;
4345 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4346#endif
4347
4348 /// If the block has multiple successors, return the branch recipe terminating
4349 /// the block. If there are no or only a single successor, return nullptr;
4350 VPRecipeBase *getTerminator();
4351 const VPRecipeBase *getTerminator() const;
4352
4353 /// Returns true if the block is exiting it's parent region.
4354 bool isExiting() const;
4355
4356 /// Clone the current block and it's recipes, without updating the operands of
4357 /// the cloned recipes.
4358 VPBasicBlock *clone() override;
4359
4360 /// Returns the predecessor block at index \p Idx with the predecessors as per
4361 /// the corresponding plain CFG. If the block is an entry block to a region,
4362 /// the first predecessor is the single predecessor of a region, and the
4363 /// second predecessor is the exiting block of the region.
4364 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4365
4366protected:
4367 /// Execute the recipes in the IR basic block \p BB.
4368 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4369
4370 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4371 /// generated for this VPBB.
4372 void connectToPredecessors(VPTransformState &State);
4373
4374private:
4375 /// Create an IR BasicBlock to hold the output instructions generated by this
4376 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4377 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4378};
4379
4380inline const VPBasicBlock *
4382 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4383}
4384
4385/// A special type of VPBasicBlock that wraps an existing IR basic block.
4386/// Recipes of the block get added before the first non-phi instruction in the
4387/// wrapped block.
4388/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4389/// preheader block.
4390class VPIRBasicBlock : public VPBasicBlock {
4391 friend class VPlan;
4392
4393 BasicBlock *IRBB;
4394
4395 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4396 VPIRBasicBlock(BasicBlock *IRBB)
4397 : VPBasicBlock(VPIRBasicBlockSC,
4398 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4399 IRBB(IRBB) {}
4400
4401public:
4402 ~VPIRBasicBlock() override = default;
4403
4404 static inline bool classof(const VPBlockBase *V) {
4405 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4406 }
4407
4408 /// The method which generates the output IR instructions that correspond to
4409 /// this VPBasicBlock, thereby "executing" the VPlan.
4410 void execute(VPTransformState *State) override;
4411
4412 VPIRBasicBlock *clone() override;
4413
4414 BasicBlock *getIRBasicBlock() const { return IRBB; }
4415};
4416
4417/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4418/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4419/// A VPRegionBlock may indicate that its contents are to be replicated several
4420/// times. This is designed to support predicated scalarization, in which a
4421/// scalar if-then code structure needs to be generated VF * UF times. Having
4422/// this replication indicator helps to keep a single model for multiple
4423/// candidate VF's. The actual replication takes place only once the desired VF
4424/// and UF have been determined.
4425class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4426 friend class VPlan;
4427
4428 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4429 VPBlockBase *Entry;
4430
4431 /// Hold the Single Exiting block of the SESE region modelled by the
4432 /// VPRegionBlock.
4433 VPBlockBase *Exiting;
4434
4435 /// An indicator whether this region is to generate multiple replicated
4436 /// instances of output IR corresponding to its VPBlockBases.
4437 bool IsReplicator;
4438
4439 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
4440 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4441 const std::string &Name = "", bool IsReplicator = false)
4442 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
4443 IsReplicator(IsReplicator) {
4444 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
4445 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
4446 Entry->setParent(this);
4447 Exiting->setParent(this);
4448 }
4449 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
4450 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
4451 IsReplicator(IsReplicator) {}
4452
4453public:
4454 ~VPRegionBlock() override = default;
4455
4456 /// Method to support type inquiry through isa, cast, and dyn_cast.
4457 static inline bool classof(const VPBlockBase *V) {
4458 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4459 }
4460
4461 const VPBlockBase *getEntry() const { return Entry; }
4462 VPBlockBase *getEntry() { return Entry; }
4463
4464 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4465 /// EntryBlock must have no predecessors.
4466 void setEntry(VPBlockBase *EntryBlock) {
4467 assert(EntryBlock->getPredecessors().empty() &&
4468 "Entry block cannot have predecessors.");
4469 Entry = EntryBlock;
4470 EntryBlock->setParent(this);
4471 }
4472
4473 const VPBlockBase *getExiting() const { return Exiting; }
4474 VPBlockBase *getExiting() { return Exiting; }
4475
4476 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4477 /// ExitingBlock must have no successors.
4478 void setExiting(VPBlockBase *ExitingBlock) {
4479 assert(ExitingBlock->getSuccessors().empty() &&
4480 "Exit block cannot have successors.");
4481 Exiting = ExitingBlock;
4482 ExitingBlock->setParent(this);
4483 }
4484
4485 /// Returns the pre-header VPBasicBlock of the loop region.
4487 assert(!isReplicator() && "should only get pre-header of loop regions");
4488 return getSinglePredecessor()->getExitingBasicBlock();
4489 }
4490
4491 /// An indicator whether this region is to generate multiple replicated
4492 /// instances of output IR corresponding to its VPBlockBases.
4493 bool isReplicator() const { return IsReplicator; }
4494
4495 /// The method which generates the output IR instructions that correspond to
4496 /// this VPRegionBlock, thereby "executing" the VPlan.
4497 void execute(VPTransformState *State) override;
4498
4499 // Return the cost of this region.
4500 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4501
4502#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4503 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4504 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4505 /// consequtive numbers.
4506 ///
4507 /// Note that the numbering is applied to the whole VPlan, so printing
4508 /// individual regions is consistent with the whole VPlan printing.
4509 void print(raw_ostream &O, const Twine &Indent,
4510 VPSlotTracker &SlotTracker) const override;
4511 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4512#endif
4513
4514 /// Clone all blocks in the single-entry single-exit region of the block and
4515 /// their recipes without updating the operands of the cloned recipes.
4516 VPRegionBlock *clone() override;
4517
4518 /// Remove the current region from its VPlan, connecting its predecessor to
4519 /// its entry, and its exiting block to its successor.
4520 void dissolveToCFGLoop();
4521
4522 /// Returns the canonical induction recipe of the region.
4524 VPBasicBlock *EntryVPBB = getEntryBasicBlock();
4525 if (EntryVPBB->empty()) {
4526 // VPlan native path. TODO: Unify both code paths.
4527 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4528 }
4529 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4530 }
4532 return const_cast<VPRegionBlock *>(this)->getCanonicalIV();
4533 }
4534
4535 /// Return the type of the canonical IV for loop regions.
4536 Type *getCanonicalIVType() { return getCanonicalIV()->getScalarType(); }
4537 const Type *getCanonicalIVType() const {
4538 return getCanonicalIV()->getScalarType();
4539 }
4540};
4541
4543 return getParent()->getParent();
4544}
4545
4547 return getParent()->getParent();
4548}
4549
4550/// VPlan models a candidate for vectorization, encoding various decisions take
4551/// to produce efficient output IR, including which branches, basic-blocks and
4552/// output IR instructions to generate, and their cost. VPlan holds a
4553/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4554/// VPBasicBlock.
4555class VPlan {
4556 friend class VPlanPrinter;
4557 friend class VPSlotTracker;
4558
4559 /// VPBasicBlock corresponding to the original preheader. Used to place
4560 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4561 /// rest of VPlan execution.
4562 /// When this VPlan is used for the epilogue vector loop, the entry will be
4563 /// replaced by a new entry block created during skeleton creation.
4564 VPBasicBlock *Entry;
4565
4566 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4567 VPIRBasicBlock *ScalarHeader;
4568
4569 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4570 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4571 /// e.g. if the scalar epilogue always executes.
4573
4574 /// Holds the VFs applicable to this VPlan.
4576
4577 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4578 /// any UF.
4580
4581 /// Holds the name of the VPlan, for printing.
4582 std::string Name;
4583
4584 /// Represents the trip count of the original loop, for folding
4585 /// the tail.
4586 VPValue *TripCount = nullptr;
4587
4588 /// Represents the backedge taken count of the original loop, for folding
4589 /// the tail. It equals TripCount - 1.
4590 VPSymbolicValue *BackedgeTakenCount = nullptr;
4591
4592 /// Represents the vector trip count.
4593 VPSymbolicValue VectorTripCount;
4594
4595 /// Represents the vectorization factor of the loop.
4596 VPSymbolicValue VF;
4597
4598 /// Represents the unroll factor of the loop.
4599 VPSymbolicValue UF;
4600
4601 /// Represents the loop-invariant VF * UF of the vector loop region.
4602 VPSymbolicValue VFxUF;
4603
4604 /// Contains all the external definitions created for this VPlan, as a mapping
4605 /// from IR Values to VPIRValues.
4607
4608 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4609 /// VPlan is destroyed.
4610 SmallVector<VPBlockBase *> CreatedBlocks;
4611
4612 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4613 /// wrapping the original header of the scalar loop.
4614 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4615 : Entry(Entry), ScalarHeader(ScalarHeader) {
4616 Entry->setPlan(this);
4617 assert(ScalarHeader->getNumSuccessors() == 0 &&
4618 "scalar header must be a leaf node");
4619 }
4620
4621public:
4622 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4623 /// original preheader and scalar header of \p L, to be used as entry and
4624 /// scalar header blocks of the new VPlan.
4625 VPlan(Loop *L);
4626
4627 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4628 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4629 VPlan(BasicBlock *ScalarHeaderBB) {
4630 setEntry(createVPBasicBlock("preheader"));
4631 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4632 }
4633
4635
4637 Entry = VPBB;
4638 VPBB->setPlan(this);
4639 }
4640
4641 /// Generate the IR code for this VPlan.
4642 void execute(VPTransformState *State);
4643
4644 /// Return the cost of this plan.
4646
4647 VPBasicBlock *getEntry() { return Entry; }
4648 const VPBasicBlock *getEntry() const { return Entry; }
4649
4650 /// Returns the preheader of the vector loop region, if one exists, or null
4651 /// otherwise.
4653 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4654 return VectorRegion
4655 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4656 : nullptr;
4657 }
4658
4659 /// Returns the VPRegionBlock of the vector loop.
4662
4663 /// Returns the 'middle' block of the plan, that is the block that selects
4664 /// whether to execute the scalar tail loop or the exit block from the loop
4665 /// latch. If there is an early exit from the vector loop, the middle block
4666 /// conceptully has the early exit block as third successor, split accross 2
4667 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4668 /// tail loop or the exit block. If the scalar tail loop or exit block are
4669 /// known to always execute, the middle block may branch directly to that
4670 /// block. This function cannot be called once the vector loop region has been
4671 /// removed.
4673 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4674 assert(
4675 LoopRegion &&
4676 "cannot call the function after vector loop region has been removed");
4677 // The middle block is always the last successor of the region.
4678 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4679 }
4680
4682 return const_cast<VPlan *>(this)->getMiddleBlock();
4683 }
4684
4685 /// Return the VPBasicBlock for the preheader of the scalar loop.
4687 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
4688 }
4689
4690 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4691 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4692
4693 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4694 /// the original scalar loop.
4695 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4696
4697 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4698 /// exit block.
4700
4701 /// Returns true if \p VPBB is an exit block.
4702 bool isExitBlock(VPBlockBase *VPBB);
4703
4704 /// The trip count of the original loop.
4706 assert(TripCount && "trip count needs to be set before accessing it");
4707 return TripCount;
4708 }
4709
4710 /// Set the trip count assuming it is currently null; if it is not - use
4711 /// resetTripCount().
4712 void setTripCount(VPValue *NewTripCount) {
4713 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4714 TripCount = NewTripCount;
4715 }
4716
4717 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4718 /// the original trip count have been replaced.
4719 void resetTripCount(VPValue *NewTripCount) {
4720 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4721 "TripCount must be set when resetting");
4722 TripCount = NewTripCount;
4723 }
4724
4725 /// The backedge taken count of the original loop.
4727 if (!BackedgeTakenCount)
4728 BackedgeTakenCount = new VPSymbolicValue();
4729 return BackedgeTakenCount;
4730 }
4731 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4732
4733 /// The vector trip count.
4734 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4735
4736 /// Returns the VF of the vector loop region.
4737 VPValue &getVF() { return VF; };
4738 const VPValue &getVF() const { return VF; };
4739
4740 /// Returns the UF of the vector loop region.
4741 VPValue &getUF() { return UF; };
4742
4743 /// Returns VF * UF of the vector loop region.
4744 VPValue &getVFxUF() { return VFxUF; }
4745
4748 }
4749
4750 void addVF(ElementCount VF) { VFs.insert(VF); }
4751
4753 assert(hasVF(VF) && "Cannot set VF not already in plan");
4754 VFs.clear();
4755 VFs.insert(VF);
4756 }
4757
4758 /// Remove \p VF from the plan.
4760 assert(hasVF(VF) && "tried to remove VF not present in plan");
4761 VFs.remove(VF);
4762 }
4763
4764 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4765 bool hasScalableVF() const {
4766 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4767 }
4768
4769 /// Returns an iterator range over all VFs of the plan.
4772 return VFs;
4773 }
4774
4775 bool hasScalarVFOnly() const {
4776 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4777 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4778 "Plan with scalar VF should only have a single VF");
4779 return HasScalarVFOnly;
4780 }
4781
4782 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4783
4784 /// Returns the concrete UF of the plan, after unrolling.
4785 unsigned getConcreteUF() const {
4786 assert(UFs.size() == 1 && "Expected a single UF");
4787 return UFs[0];
4788 }
4789
4790 void setUF(unsigned UF) {
4791 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4792 UFs.clear();
4793 UFs.insert(UF);
4794 }
4795
4796 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4797 /// concrete UF.
4798 bool isUnrolled() const { return UFs.size() == 1; }
4799
4800 /// Return a string with the name of the plan and the applicable VFs and UFs.
4801 std::string getName() const;
4802
4803 void setName(const Twine &newName) { Name = newName.str(); }
4804
4805 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4806 /// yet) for \p V.
4808 assert(V && "Trying to get or add the VPIRValue of a null Value");
4809 auto [It, Inserted] = LiveIns.try_emplace(V);
4810 if (Inserted) {
4811 if (auto *CI = dyn_cast<ConstantInt>(V))
4812 It->second = new VPConstantInt(CI);
4813 else
4814 It->second = new VPIRValue(V);
4815 }
4816
4817 assert(isa<VPIRValue>(It->second) &&
4818 "Only VPIRValues should be in mapping");
4819 return It->second;
4820 }
4822 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4823 return getOrAddLiveIn(V->getValue());
4824 }
4825
4826 /// Return a VPIRValue wrapping i1 true.
4827 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4828
4829 /// Return a VPIRValue wrapping i1 false.
4830 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4831
4832 /// Return a VPIRValue wrapping the null value of type \p Ty.
4833 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4834
4835 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4837 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4838 }
4839
4840 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4841 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4842 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4843 }
4844
4845 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4846 /// value.
4848 bool IsSigned = false) {
4849 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4850 }
4851
4852 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4854 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4855 }
4856
4857 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4858 /// otherwise.
4859 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4860
4861 /// Return the list of live-in VPValues available in the VPlan.
4862 auto getLiveIns() const { return LiveIns.values(); }
4863
4864#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4865 /// Print the live-ins of this VPlan to \p O.
4866 void printLiveIns(raw_ostream &O) const;
4867
4868 /// Print this VPlan to \p O.
4869 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4870
4871 /// Print this VPlan in DOT format to \p O.
4872 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4873
4874 /// Dump the plan to stderr (for debugging).
4875 LLVM_DUMP_METHOD void dump() const;
4876#endif
4877
4878 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4879 /// recipes to refer to the clones, and return it.
4881
4882 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4883 /// present. The returned block is owned by the VPlan and deleted once the
4884 /// VPlan is destroyed.
4886 VPRecipeBase *Recipe = nullptr) {
4887 auto *VPB = new VPBasicBlock(Name, Recipe);
4888 CreatedBlocks.push_back(VPB);
4889 return VPB;
4890 }
4891
4892 /// Create a new loop region with \p Name and entry and exiting blocks set
4893 /// to \p Entry and \p Exiting respectively, if set. The returned block is
4894 /// owned by the VPlan and deleted once the VPlan is destroyed.
4895 VPRegionBlock *createLoopRegion(const std::string &Name = "",
4896 VPBlockBase *Entry = nullptr,
4897 VPBlockBase *Exiting = nullptr) {
4898 auto *VPB = Entry ? new VPRegionBlock(Entry, Exiting, Name)
4899 : new VPRegionBlock(Name);
4900 CreatedBlocks.push_back(VPB);
4901 return VPB;
4902 }
4903
4904 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4905 /// returned block is owned by the VPlan and deleted once the VPlan is
4906 /// destroyed.
4908 const std::string &Name = "") {
4909 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, true);
4910 CreatedBlocks.push_back(VPB);
4911 return VPB;
4912 }
4913
4914 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4915 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4916 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4918
4919 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4920 /// instructions in \p IRBB, except its terminator which is managed by the
4921 /// successors of the block in VPlan. The returned block is owned by the VPlan
4922 /// and deleted once the VPlan is destroyed.
4924
4925 /// Returns true if the VPlan is based on a loop with an early exit. That is
4926 /// the case if the VPlan has either more than one exit block or a single exit
4927 /// block with multiple predecessors (one for the exit via the latch and one
4928 /// via the other early exit).
4929 bool hasEarlyExit() const {
4930 return count_if(ExitBlocks,
4931 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4932 1 ||
4933 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4934 }
4935
4936 /// Returns true if the scalar tail may execute after the vector loop. Note
4937 /// that this relies on unneeded branches to the scalar tail loop being
4938 /// removed.
4939 bool hasScalarTail() const {
4940 return !(!getScalarPreheader()->hasPredecessors() ||
4942 }
4943};
4944
4945#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4946inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4947 Plan.print(OS);
4948 return OS;
4949}
4950#endif
4951
4952} // end namespace llvm
4953
4954#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
iv users
Definition IVUsers.cpp:48
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:570
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3865
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3859
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4237
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4265
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4312
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4267
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4264
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4290
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4248
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4254
iterator end()
Definition VPlan.h:4274
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4272
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4266
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4325
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:791
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
~VPBasicBlock() override
Definition VPlan.h:4258
const_reverse_iterator rbegin() const
Definition VPlan.h:4278
reverse_iterator rend()
Definition VPlan.h:4279
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4252
VPRecipeBase & back()
Definition VPlan.h:4287
const VPRecipeBase & front() const
Definition VPlan.h:4284
const_iterator begin() const
Definition VPlan.h:4273
VPRecipeBase & front()
Definition VPlan.h:4285
const VPRecipeBase & back() const
Definition VPlan.h:4286
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4303
bool empty() const
Definition VPlan.h:4283
const_iterator end() const
Definition VPlan.h:4275
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4298
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4293
reverse_iterator rbegin()
Definition VPlan.h:4277
friend class VPlan
Definition VPlan.h:4238
size_t size() const
Definition VPlan.h:4282
const_reverse_iterator rend() const
Definition VPlan.h:4280
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2793
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2798
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2768
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2788
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2809
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2818
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2775
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2804
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2784
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:82
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:301
VPRegionBlock * getParent()
Definition VPlan.h:174
VPBlocksTy & getPredecessors()
Definition VPlan.h:206
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:203
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:371
void setName(const Twine &newName)
Definition VPlan.h:167
size_t getNumSuccessors() const
Definition VPlan.h:220
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:202
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:224
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:323
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:666
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:161
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:259
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:336
size_t getNumPredecessors() const
Definition VPlan.h:221
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:292
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:224
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:329
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:205
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:159
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:196
const VPRegionBlock * getParent() const
Definition VPlan.h:175
const std::string & getName() const
Definition VPlan.h:165
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:311
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:249
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:283
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:216
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:243
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:308
friend class VPBlockUtils
Definition VPlan.h:83
unsigned getVPBlockID() const
Definition VPlan.h:172
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:350
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:315
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:151
VPBlocksTy & getSuccessors()
Definition VPlan.h:200
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:216
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:272
void setParent(VPRegionBlock *P)
Definition VPlan.h:185
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:265
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:210
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:199
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3287
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3271
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3295
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3268
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3799
~VPCanonicalIVPHIRecipe() override=default
VPCanonicalIVPHIRecipe(VPIRValue *StartV, DebugLoc DL)
Definition VPlan.h:3801
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3827
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3807
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:3834
VPIRValue * getStartValue() const
Returns the start value of the canonical induction.
Definition VPlan.h:3821
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:3824
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3815
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition VPlan.h:3841
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3897
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3891
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3909
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3903
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3916
~VPCurrentIterationPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:4016
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:4008
VPValue * getStepValue() const
Definition VPlan.h:4017
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3980
Type * getScalarType() const
Definition VPlan.h:4014
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3996
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4020
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3988
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3774
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3779
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3765
const SCEV * getSCEV() const
Definition VPlan.h:3785
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3770
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3422
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3404
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3386
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3374
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3360
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3352
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3356
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3416
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3354
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2276
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2280
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2293
static bool classof(const VPValue *V)
Definition VPlan.h:2290
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2316
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2321
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2305
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2313
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2286
VPValue * getStartValue() const
Definition VPlan.h:2308
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2325
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2028
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2045
unsigned getOpcode() const
Definition VPlan.h:2041
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2021
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4390
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4414
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4404
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4391
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
Class to record and manage LLVM IR flags.
Definition VPlan.h:672
FastMathFlagsTy FMFs
Definition VPlan.h:760
ReductionFlagsTy ReductionFlags
Definition VPlan.h:762
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:853
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:833
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:819
WrapFlagsTy WrapFlags
Definition VPlan.h:754
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:812
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:977
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1027
TruncFlagsTy TruncFlags
Definition VPlan.h:755
CmpInst::Predicate getPredicate() const
Definition VPlan.h:949
uint8_t AllFlags[2]
Definition VPlan.h:763
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:985
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:858
ExactFlagsTy ExactFlags
Definition VPlan.h:757
bool hasNoSignedWrap() const
Definition VPlan.h:1004
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1015
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:824
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:829
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:838
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:807
uint8_t GEPFlagsStorage
Definition VPlan.h:758
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:843
bool isNonNeg() const
Definition VPlan.h:987
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:967
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:972
DisjointFlagsTy DisjointFlags
Definition VPlan.h:756
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:957
bool hasNoUnsignedWrap() const
Definition VPlan.h:993
FCmpFlagsTy FCmpFlags
Definition VPlan.h:761
NonNegFlagsTy NonNegFlags
Definition VPlan.h:759
bool isReductionInLoop() const
Definition VPlan.h:1033
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:869
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:906
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:848
uint8_t CmpPredStorage
Definition VPlan.h:753
RecurKind getRecurKind() const
Definition VPlan.h:1021
VPIRFlags(Instruction &I)
Definition VPlan.h:769
Instruction & getInstruction() const
Definition VPlan.h:1687
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1695
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1674
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1701
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1689
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1662
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1138
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1174
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1146
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1158
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1492
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1534
static bool classof(const VPUser *R)
Definition VPlan.h:1519
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1500
Type * getResultType() const
Definition VPlan.h:1540
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1523
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1193
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1424
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1444
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1365
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1304
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1295
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1311
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
Definition VPlan.h:1240
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1285
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1298
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1237
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1289
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1232
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1229
@ VScale
Returns the value for vscale.
Definition VPlan.h:1307
@ CanonicalIVIncrementForPart
Definition VPlan.h:1213
bool hasResult() const
Definition VPlan.h:1389
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1447
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1429
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1469
unsigned getOpcode() const
Definition VPlan.h:1373
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1472
friend class VPlanSlp
Definition VPlan.h:1194
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1438
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1414
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2905
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2911
static bool classof(const VPUser *U)
Definition VPlan.h:2887
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2853
Instruction * getInsertPos() const
Definition VPlan.h:2909
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2882
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2907
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2899
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2928
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2893
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2980
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3008
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3002
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3015
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2995
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2982
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2938
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2965
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2948
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2959
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2940
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1552
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1581
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1576
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4381
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1601
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1561
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1586
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1590
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3479
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3461
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3472
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3457
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:388
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:532
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4542
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:543
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:463
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:537
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:512
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:390
const VPBasicBlock * getParent() const
Definition VPlan.h:464
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:517
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:509
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCanonicalIVPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCanonicalIVPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:406
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:453
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:232
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:143
friend class VPValue
Definition VPlanValue.h:233
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3163
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3142
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3166
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3153
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2729
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2715
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2694
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2708
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2741
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2723
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2682
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2732
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2746
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2738
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2726
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3031
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3040
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3105
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3074
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3089
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3116
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3118
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3101
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3054
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3103
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3060
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3107
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3114
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3109
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3068
static bool classof(const VPUser *U)
Definition VPlan.h:3079
static bool classof(const VPValue *VPV)
Definition VPlan.h:3084
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3123
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4425
const VPBlockBase * getEntry() const
Definition VPlan.h:4461
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4536
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4493
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4478
VPBlockBase * getExiting()
Definition VPlan.h:4474
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4523
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4466
const Type * getCanonicalIVType() const
Definition VPlan.h:4537
const VPBlockBase * getExiting() const
Definition VPlan.h:4473
VPBlockBase * getEntry()
Definition VPlan.h:4462
const VPCanonicalIVPHIRecipe * getCanonicalIV() const
Definition VPlan.h:4531
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4486
friend class VPlan
Definition VPlan.h:4426
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4457
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3185
bool isSingleScalar() const
Definition VPlan.h:3226
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3193
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3238
bool isPredicated() const
Definition VPlan.h:3228
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3207
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3231
unsigned getOpcode() const
Definition VPlan.h:3255
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3250
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4112
VPValue * getStepValue() const
Definition VPlan.h:4085
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4079
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4052
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4098
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4064
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4093
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4089
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4045
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4106
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:589
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:595
static bool classof(const VPValue *V)
Definition VPlan.h:644
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:657
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:599
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:660
static bool classof(const VPUser *U)
Definition VPlan.h:649
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:591
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1126
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:258
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1451
operand_range operands()
Definition VPlanValue.h:326
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:302
unsigned getNumOperands() const
Definition VPlanValue.h:296
operand_iterator op_end()
Definition VPlanValue.h:324
operand_iterator op_begin()
Definition VPlanValue.h:322
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:297
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:277
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:320
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:319
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:137
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
friend class VPRecipeValue
Definition VPlanValue.h:50
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:71
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:172
unsigned getNumUsers() const
Definition VPlanValue.h:104
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2154
VPValue * getVFValue() const
Definition VPlan.h:2143
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2140
int64_t getStride() const
Definition VPlan.h:2141
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2175
VPValue * getOffset() const
Definition VPlan.h:2144
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2168
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2130
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2161
VPValue * getPointer() const
Definition VPlan.h:2142
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2212
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2214
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2221
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2199
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2237
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2228
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1960
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1967
const_operand_range args() const
Definition VPlan.h:2001
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1982
operand_range args()
Definition VPlan.h:2000
Function * getCalledScalarFunction() const
Definition VPlan.h:1996
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3952
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3939
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition VPlan.h:3934
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1808
Instruction::CastOps getOpcode() const
Definition VPlan.h:1846
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1849
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1816
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1831
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2092
Type * getSourceElementType() const
Definition VPlan.h:2097
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2100
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2084
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2070
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2408
static bool classof(const VPValue *V)
Definition VPlan.h:2356
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2375
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2393
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2368
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2383
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2386
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2344
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2371
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2391
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2400
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2351
const VPValue * getVFValue() const
Definition VPlan.h:2378
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2361
const VPValue * getStepValue() const
Definition VPlan.h:2372
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2469
const TruncInst * getTruncInst() const
Definition VPlan.h:2485
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2463
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2473
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2455
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2429
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2484
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2438
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2500
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2480
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2493
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1860
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1891
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1931
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1940
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1877
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1946
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1912
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1943
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1934
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3510
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition VPlan.h:3507
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3550
static bool classof(const VPUser *U)
Definition VPlan.h:3544
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3573
Instruction & Ingredient
Definition VPlan.h:3498
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3533
Instruction & getIngredient() const
Definition VPlan.h:3581
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3504
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3537
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3564
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3501
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3560
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3520
void setMask(VPValue *Mask)
Definition VPlan.h:3512
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3570
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3557
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition VPlan.h:3554
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2600
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2565
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2573
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2527
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2536
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2517
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1752
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1772
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1799
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1756
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1764
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1789
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4555
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4859
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1139
friend class VPSlotTracker
Definition VPlan.h:4557
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1115
bool hasVF(ElementCount VF) const
Definition VPlan.h:4764
LLVMContext & getContext() const
Definition VPlan.h:4746
VPBasicBlock * getEntry()
Definition VPlan.h:4647
void setName(const Twine &newName)
Definition VPlan.h:4803
bool hasScalableVF() const
Definition VPlan.h:4765
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4744
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4737
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4705
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4726
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4771
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:910
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:888
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4821
const VPValue & getVF() const
Definition VPlan.h:4738
VPValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4741
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:918
const VPBasicBlock * getEntry() const
Definition VPlan.h:4648
friend class VPlanPrinter
Definition VPlan.h:4556
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4830
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4853
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4836
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4907
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1251
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4862
bool hasUF(unsigned UF) const
Definition VPlan.h:4782
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4695
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4734
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4731
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4807
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4833
void setVF(ElementCount VF)
Definition VPlan.h:4752
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4798
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1038
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4929
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1020
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4785
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4847
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4681
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4712
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4719
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4672
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4636
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4885
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1257
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4759
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4827
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4895
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1145
bool hasScalarVFOnly() const
Definition VPlan.h:4775
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4686
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:928
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1098
void addVF(ElementCount VF)
Definition VPlan.h:4750
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4691
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1054
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4652
void setUF(unsigned UF)
Definition VPlan.h:4790
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4939
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1186
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4629
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4841
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2507
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
static auto castToVPIRMetadata(RecipeBasePtrTy R) -> DstTy
Definition VPlan.h:4171
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
LLVM_PACKED_END
Definition VPlan.h:1076
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2655
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:336
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2653
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
Support casting from VPRecipeBase -> VPIRMetadata, by down-casting to the recipe types implementing V...
Definition VPlan.h:4202
static RetTy doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:4216
static RetTy doCastIfPossible(SrcTy R)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4221
static bool isPossible(SrcTy R)
Definition VPlan.h:4203
Support casting from VPRecipeBase -> VPPhiAccessors, by down-casting to the recipe types implementing...
Definition VPlan.h:4133
static VPPhiAccessors * doCastIfPossible(SrcTy f)
doCastIfPossible is used by dyn_cast<>.
Definition VPlan.h:4154
CastInfo< VPPhiAccessors, SrcTy > Self
Definition VPlan.h:4135
static VPPhiAccessors * doCast(SrcTy R)
doCast is used by cast<>.
Definition VPlan.h:4138
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
static bool isPossible(const VPRecipeBase *f)
Definition VPlan.h:4125
This struct provides a way to check if a given cast is possible.
Definition Casting.h:253
static bool isPossible(const SrcTy &f)
Definition Casting.h:254
This reduction is in-loop.
Definition VPlan.h:2647
Possible variants of a reduction.
Definition VPlan.h:2645
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2650
unsigned VFScaleFactor
Definition VPlan.h:2651
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:201
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2616
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2628
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2607
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:704
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:709
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:699
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:692
PHINode & getIRPhi()
Definition VPlan.h:1733
VPIRPhi(PHINode &PN)
Definition VPlan.h:1721
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1723
static bool classof(const VPUser *U)
Definition VPlan.h:1728
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1744
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:141
static bool classof(const VPUser *U)
Definition VPlan.h:1620
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1616
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1635
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1650
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1630
static bool classof(const VPValue *V)
Definition VPlan.h:1625
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1080
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1113
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1086
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1081
static bool classof(const VPValue *V)
Definition VPlan.h:1106
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1101
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:223
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3629
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3642
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3630
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3652
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3587
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3609
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3588
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3597
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3714
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3726
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3715
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3739
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3729
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3670
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3689
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3680
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3695
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3671