LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class VPlanSlp;
69class Value;
71
72struct VPCostContext;
73
74namespace Intrinsic {
75typedef unsigned ID;
76}
77
78using VPlanPtr = std::unique_ptr<VPlan>;
79
80/// \enum UncountableExitStyle
81/// Different methods of handling early exits.
82///
85 /// No side effects to worry about, so we can process any uncountable exits
86 /// in the loop and branch either to the middle block if the trip count was
87 /// reached, or an early exitblock to determine which exit was taken.
89 /// All memory operations other than the load(s) required to determine whether
90 /// an uncountable exit occurre will be masked based on that condition. If an
91 /// uncountable exit is taken, then all lanes before the exiting lane will
92 /// complete, leaving just the final lane to execute in the scalar tail.
94};
95
96/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
97/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
99 friend class VPBlockUtils;
100
101 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
102
103 /// An optional name for the block.
104 std::string Name;
105
106 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
107 /// it is a topmost VPBlockBase.
108 VPRegionBlock *Parent = nullptr;
109
110 /// List of predecessor blocks.
112
113 /// List of successor blocks.
115
116 /// VPlan containing the block. Can only be set on the entry block of the
117 /// plan.
118 VPlan *Plan = nullptr;
119
120 /// Add \p Successor as the last successor to this block.
121 void appendSuccessor(VPBlockBase *Successor) {
122 assert(Successor && "Cannot add nullptr successor!");
123 Successors.push_back(Successor);
124 }
125
126 /// Add \p Predecessor as the last predecessor to this block.
127 void appendPredecessor(VPBlockBase *Predecessor) {
128 assert(Predecessor && "Cannot add nullptr predecessor!");
129 Predecessors.push_back(Predecessor);
130 }
131
132 /// Remove \p Predecessor from the predecessors of this block.
133 void removePredecessor(VPBlockBase *Predecessor) {
134 auto Pos = find(Predecessors, Predecessor);
135 assert(Pos && "Predecessor does not exist");
136 Predecessors.erase(Pos);
137 }
138
139 /// Remove \p Successor from the successors of this block.
140 void removeSuccessor(VPBlockBase *Successor) {
141 auto Pos = find(Successors, Successor);
142 assert(Pos && "Successor does not exist");
143 Successors.erase(Pos);
144 }
145
146 /// This function replaces one predecessor with another, useful when
147 /// trying to replace an old block in the CFG with a new one.
148 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
149 auto I = find(Predecessors, Old);
150 assert(I != Predecessors.end());
151 assert(Old->getParent() == New->getParent() &&
152 "replaced predecessor must have the same parent");
153 *I = New;
154 }
155
156 /// This function replaces one successor with another, useful when
157 /// trying to replace an old block in the CFG with a new one.
158 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
159 auto I = find(Successors, Old);
160 assert(I != Successors.end());
161 assert(Old->getParent() == New->getParent() &&
162 "replaced successor must have the same parent");
163 *I = New;
164 }
165
166protected:
167 VPBlockBase(const unsigned char SC, const std::string &N)
168 : SubclassID(SC), Name(N) {}
169
170public:
171 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
172 /// that are actually instantiated. Values of this enumeration are kept in the
173 /// SubclassID field of the VPBlockBase objects. They are used for concrete
174 /// type identification.
175 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
176
178
179 virtual ~VPBlockBase() = default;
180
181 const std::string &getName() const { return Name; }
182
183 void setName(const Twine &newName) { Name = newName.str(); }
184
185 /// \return an ID for the concrete type of this object.
186 /// This is used to implement the classof checks. This should not be used
187 /// for any other purpose, as the values may change as LLVM evolves.
188 unsigned getVPBlockID() const { return SubclassID; }
189
190 VPRegionBlock *getParent() { return Parent; }
191 const VPRegionBlock *getParent() const { return Parent; }
192
193 /// \return A pointer to the plan containing the current block.
194 VPlan *getPlan();
195 const VPlan *getPlan() const;
196
197 /// Sets the pointer of the plan containing the block. The block must be the
198 /// entry block into the VPlan.
199 void setPlan(VPlan *ParentPlan);
200
201 void setParent(VPRegionBlock *P) { Parent = P; }
202
203 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
204 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
205 /// VPBlockBase is a VPBasicBlock, it is returned.
206 const VPBasicBlock *getEntryBasicBlock() const;
207 VPBasicBlock *getEntryBasicBlock();
208
209 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
210 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
211 /// VPBlockBase is a VPBasicBlock, it is returned.
212 const VPBasicBlock *getExitingBasicBlock() const;
213 VPBasicBlock *getExitingBasicBlock();
214
215 const VPBlocksTy &getSuccessors() const { return Successors; }
216 VPBlocksTy &getSuccessors() { return Successors; }
217
218 /// Returns true if this block has any successors.
219 bool hasSuccessors() const { return !Successors.empty(); }
220 /// Returns true if this block has any predecessors.
221 bool hasPredecessors() const { return !Predecessors.empty(); }
222
225
226 const VPBlocksTy &getPredecessors() const { return Predecessors; }
227 VPBlocksTy &getPredecessors() { return Predecessors; }
228
229 /// \return the successor of this VPBlockBase if it has a single successor.
230 /// Otherwise return a null pointer.
232 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
233 }
234
235 /// \return the predecessor of this VPBlockBase if it has a single
236 /// predecessor. Otherwise return a null pointer.
238 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
239 }
240
241 size_t getNumSuccessors() const { return Successors.size(); }
242 size_t getNumPredecessors() const { return Predecessors.size(); }
243
244 /// An Enclosing Block of a block B is any block containing B, including B
245 /// itself. \return the closest enclosing block starting from "this", which
246 /// has successors. \return the root enclosing block if all enclosing blocks
247 /// have no successors.
248 VPBlockBase *getEnclosingBlockWithSuccessors();
249
250 /// \return the closest enclosing block starting from "this", which has
251 /// predecessors. \return the root enclosing block if all enclosing blocks
252 /// have no predecessors.
253 VPBlockBase *getEnclosingBlockWithPredecessors();
254
255 /// \return the successors either attached directly to this VPBlockBase or, if
256 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
257 /// successors of its own, search recursively for the first enclosing
258 /// VPRegionBlock that has successors and return them. If no such
259 /// VPRegionBlock exists, return the (empty) successors of the topmost
260 /// VPBlockBase reached.
262 return getEnclosingBlockWithSuccessors()->getSuccessors();
263 }
264
265 /// \return the hierarchical successor of this VPBlockBase if it has a single
266 /// hierarchical successor. Otherwise return a null pointer.
268 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
269 }
270
271 /// \return the predecessors either attached directly to this VPBlockBase or,
272 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
273 /// predecessors of its own, search recursively for the first enclosing
274 /// VPRegionBlock that has predecessors and return them. If no such
275 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
276 /// VPBlockBase reached.
278 return getEnclosingBlockWithPredecessors()->getPredecessors();
279 }
280
281 /// \return the hierarchical predecessor of this VPBlockBase if it has a
282 /// single hierarchical predecessor. Otherwise return a null pointer.
286
287 /// Set a given VPBlockBase \p Successor as the single successor of this
288 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
289 /// This VPBlockBase must have no successors.
291 assert(Successors.empty() && "Setting one successor when others exist.");
292 assert(Successor->getParent() == getParent() &&
293 "connected blocks must have the same parent");
294 appendSuccessor(Successor);
295 }
296
297 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
298 /// successors of this VPBlockBase. This VPBlockBase is not added as
299 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
300 /// successors.
301 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
302 assert(Successors.empty() && "Setting two successors when others exist.");
303 appendSuccessor(IfTrue);
304 appendSuccessor(IfFalse);
305 }
306
307 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
308 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
309 /// as successor of any VPBasicBlock in \p NewPreds.
311 assert(Predecessors.empty() && "Block predecessors already set.");
312 for (auto *Pred : NewPreds)
313 appendPredecessor(Pred);
314 }
315
316 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
317 /// This VPBlockBase must have no successors. This VPBlockBase is not added
318 /// as predecessor of any VPBasicBlock in \p NewSuccs.
320 assert(Successors.empty() && "Block successors already set.");
321 for (auto *Succ : NewSuccs)
322 appendSuccessor(Succ);
323 }
324
325 /// Remove all the predecessor of this block.
326 void clearPredecessors() { Predecessors.clear(); }
327
328 /// Remove all the successors of this block.
329 void clearSuccessors() { Successors.clear(); }
330
331 /// Swap predecessors of the block. The block must have exactly 2
332 /// predecessors.
334 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
335 std::swap(Predecessors[0], Predecessors[1]);
336 }
337
338 /// Swap successors of the block. The block must have exactly 2 successors.
339 // TODO: This should be part of introducing conditional branch recipes rather
340 // than being independent.
342 assert(Successors.size() == 2 && "must have 2 successors to swap");
343 std::swap(Successors[0], Successors[1]);
344 }
345
346 /// Returns the index for \p Pred in the blocks predecessors list.
347 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
348 assert(count(Predecessors, Pred) == 1 &&
349 "must have Pred exactly once in Predecessors");
350 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
351 }
352
353 /// Returns the index for \p Succ in the blocks successor list.
354 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
355 assert(count(Successors, Succ) == 1 &&
356 "must have Succ exactly once in Successors");
357 return std::distance(Successors.begin(), find(Successors, Succ));
358 }
359
360 /// The method which generates the output IR that correspond to this
361 /// VPBlockBase, thereby "executing" the VPlan.
362 virtual void execute(VPTransformState *State) = 0;
363
364 /// Return the cost of the block.
366
367#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
368 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
369 OS << getName();
370 }
371
372 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
373 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
374 /// consequtive numbers.
375 ///
376 /// Note that the numbering is applied to the whole VPlan, so printing
377 /// individual blocks is consistent with the whole VPlan printing.
378 virtual void print(raw_ostream &O, const Twine &Indent,
379 VPSlotTracker &SlotTracker) const = 0;
380
381 /// Print plain-text dump of this VPlan to \p O.
382 void print(raw_ostream &O) const;
383
384 /// Print the successors of this block to \p O, prefixing all lines with \p
385 /// Indent.
386 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
387
388 /// Dump this VPBlockBase to dbgs().
389 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
390#endif
391
392 /// Clone the current block and it's recipes without updating the operands of
393 /// the cloned recipes, including all blocks in the single-entry single-exit
394 /// region for VPRegionBlocks.
395 virtual VPBlockBase *clone() = 0;
396};
397
398/// VPRecipeBase is a base class modeling a sequence of one or more output IR
399/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
400/// and is responsible for deleting its defined values. Single-value
401/// recipes must inherit from VPSingleDef instead of inheriting from both
402/// VPRecipeBase and VPValue separately.
404 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
405 public VPDef,
406 public VPUser {
407 friend VPBasicBlock;
408 friend class VPBlockUtils;
409
410 /// Subclass identifier (for isa/dyn_cast).
411 const unsigned char SubclassID;
412
413 /// Each VPRecipe belongs to a single VPBasicBlock.
414 VPBasicBlock *Parent = nullptr;
415
416 /// The debug location for the recipe.
417 DebugLoc DL;
418
419public:
420 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
421 /// that is actually instantiated. Values of this enumeration are kept in the
422 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
423 /// type identification.
424 using VPRecipeTy = enum {
425 VPBranchOnMaskSC,
426 VPDerivedIVSC,
427 VPExpandSCEVSC,
428 VPExpressionSC,
429 VPIRInstructionSC,
430 VPInstructionSC,
431 VPInterleaveEVLSC,
432 VPInterleaveSC,
433 VPReductionEVLSC,
434 VPReductionSC,
435 VPReplicateSC,
436 VPScalarIVStepsSC,
437 VPVectorPointerSC,
438 VPVectorEndPointerSC,
439 VPWidenCallSC,
440 VPWidenCanonicalIVSC,
441 VPWidenCastSC,
442 VPWidenGEPSC,
443 VPWidenIntrinsicSC,
444 VPWidenLoadEVLSC,
445 VPWidenLoadSC,
446 VPWidenStoreEVLSC,
447 VPWidenStoreSC,
448 VPWidenSC,
449 VPBlendSC,
450 VPHistogramSC,
451 // START: Phi-like recipes. Need to be kept together.
452 VPWidenPHISC,
453 VPPredInstPHISC,
454 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
455 // VPHeaderPHIRecipe need to be kept together.
456 VPCurrentIterationPHISC,
457 VPActiveLaneMaskPHISC,
458 VPFirstOrderRecurrencePHISC,
459 VPWidenIntOrFpInductionSC,
460 VPWidenPointerInductionSC,
461 VPReductionPHISC,
462 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
463 // END: Phi-like recipes
464 VPFirstPHISC = VPWidenPHISC,
465 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
466 VPLastHeaderPHISC = VPReductionPHISC,
467 VPLastPHISC = VPReductionPHISC,
468 };
469
470 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
472 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
473
474 ~VPRecipeBase() override = default;
475
476 /// Clone the current recipe.
477 virtual VPRecipeBase *clone() = 0;
478
479 /// \return the VPBasicBlock which this VPRecipe belongs to.
480 VPBasicBlock *getParent() { return Parent; }
481 const VPBasicBlock *getParent() const { return Parent; }
482
483 /// \return the VPRegionBlock which the recipe belongs to.
484 VPRegionBlock *getRegion();
485 const VPRegionBlock *getRegion() const;
486
487 /// The method which generates the output IR instructions that correspond to
488 /// this VPRecipe, thereby "executing" the VPlan.
489 virtual void execute(VPTransformState &State) = 0;
490
491 /// Return the cost of this recipe, taking into account if the cost
492 /// computation should be skipped and the ForceTargetInstructionCost flag.
493 /// Also takes care of printing the cost for debugging.
495
496 /// Insert an unlinked recipe into a basic block immediately before
497 /// the specified recipe.
498 void insertBefore(VPRecipeBase *InsertPos);
499 /// Insert an unlinked recipe into \p BB immediately before the insertion
500 /// point \p IP;
501 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
502
503 /// Insert an unlinked Recipe into a basic block immediately after
504 /// the specified Recipe.
505 void insertAfter(VPRecipeBase *InsertPos);
506
507 /// Unlink this recipe from its current VPBasicBlock and insert it into
508 /// the VPBasicBlock that MovePos lives in, right after MovePos.
509 void moveAfter(VPRecipeBase *MovePos);
510
511 /// Unlink this recipe and insert into BB before I.
512 ///
513 /// \pre I is a valid iterator into BB.
514 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
515
516 /// This method unlinks 'this' from the containing basic block, but does not
517 /// delete it.
518 void removeFromParent();
519
520 /// This method unlinks 'this' from the containing basic block and deletes it.
521 ///
522 /// \returns an iterator pointing to the element after the erased one
524
525 /// \return an ID for the concrete type of this object.
526 unsigned getVPRecipeID() const { return SubclassID; }
527
528 /// Method to support type inquiry through isa, cast, and dyn_cast.
529 static inline bool classof(const VPDef *D) {
530 // All VPDefs are also VPRecipeBases.
531 return true;
532 }
533
534 static inline bool classof(const VPUser *U) { return true; }
535
536 /// Returns true if the recipe may have side-effects.
537 bool mayHaveSideEffects() const;
538
539 /// Returns true for PHI-like recipes.
540 bool isPhi() const;
541
542 /// Returns true if the recipe may read from memory.
543 bool mayReadFromMemory() const;
544
545 /// Returns true if the recipe may write to memory.
546 bool mayWriteToMemory() const;
547
548 /// Returns true if the recipe may read from or write to memory.
549 bool mayReadOrWriteMemory() const {
551 }
552
553 /// Returns the debug location of the recipe.
554 DebugLoc getDebugLoc() const { return DL; }
555
556 /// Return true if the recipe is a scalar cast.
557 bool isScalarCast() const;
558
559 /// Set the recipe's debug location to \p NewDL.
560 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
561
562#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
563 /// Dump the recipe to stderr (for debugging).
564 LLVM_ABI_FOR_TEST void dump() const;
565
566 /// Print the recipe, delegating to printRecipe().
567 void print(raw_ostream &O, const Twine &Indent,
569#endif
570
571protected:
572 /// Compute the cost of this recipe either using a recipe's specialized
573 /// implementation or using the legacy cost model and the underlying
574 /// instructions.
575 virtual InstructionCost computeCost(ElementCount VF,
576 VPCostContext &Ctx) const;
577
578#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
579 /// Each concrete VPRecipe prints itself, without printing common information,
580 /// like debug info or metadata.
581 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
582 VPSlotTracker &SlotTracker) const = 0;
583#endif
584};
585
586// Helper macro to define common classof implementations for recipes.
587#define VP_CLASSOF_IMPL(VPRecipeID) \
588 static inline bool classof(const VPRecipeBase *R) { \
589 return R->getVPRecipeID() == VPRecipeID; \
590 } \
591 static inline bool classof(const VPValue *V) { \
592 auto *R = V->getDefiningRecipe(); \
593 return R && R->getVPRecipeID() == VPRecipeID; \
594 } \
595 static inline bool classof(const VPUser *U) { \
596 auto *R = dyn_cast<VPRecipeBase>(U); \
597 return R && R->getVPRecipeID() == VPRecipeID; \
598 } \
599 static inline bool classof(const VPSingleDefRecipe *R) { \
600 return R->getVPRecipeID() == VPRecipeID; \
601 }
602
603/// VPSingleDef is a base class for recipes for modeling a sequence of one or
604/// more output IR that define a single result VPValue.
605/// Note that VPRecipeBase must be inherited from before VPValue.
607public:
608 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
610 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
611
612 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
614 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
615
616 static inline bool classof(const VPRecipeBase *R) {
617 switch (R->getVPRecipeID()) {
618 case VPRecipeBase::VPDerivedIVSC:
619 case VPRecipeBase::VPExpandSCEVSC:
620 case VPRecipeBase::VPExpressionSC:
621 case VPRecipeBase::VPInstructionSC:
622 case VPRecipeBase::VPReductionEVLSC:
623 case VPRecipeBase::VPReductionSC:
624 case VPRecipeBase::VPReplicateSC:
625 case VPRecipeBase::VPScalarIVStepsSC:
626 case VPRecipeBase::VPVectorPointerSC:
627 case VPRecipeBase::VPVectorEndPointerSC:
628 case VPRecipeBase::VPWidenCallSC:
629 case VPRecipeBase::VPWidenCanonicalIVSC:
630 case VPRecipeBase::VPWidenCastSC:
631 case VPRecipeBase::VPWidenGEPSC:
632 case VPRecipeBase::VPWidenIntrinsicSC:
633 case VPRecipeBase::VPWidenSC:
634 case VPRecipeBase::VPBlendSC:
635 case VPRecipeBase::VPPredInstPHISC:
636 case VPRecipeBase::VPCurrentIterationPHISC:
637 case VPRecipeBase::VPActiveLaneMaskPHISC:
638 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
639 case VPRecipeBase::VPWidenPHISC:
640 case VPRecipeBase::VPWidenIntOrFpInductionSC:
641 case VPRecipeBase::VPWidenPointerInductionSC:
642 case VPRecipeBase::VPReductionPHISC:
643 return true;
644 case VPRecipeBase::VPBranchOnMaskSC:
645 case VPRecipeBase::VPInterleaveEVLSC:
646 case VPRecipeBase::VPInterleaveSC:
647 case VPRecipeBase::VPIRInstructionSC:
648 case VPRecipeBase::VPWidenLoadEVLSC:
649 case VPRecipeBase::VPWidenLoadSC:
650 case VPRecipeBase::VPWidenStoreEVLSC:
651 case VPRecipeBase::VPWidenStoreSC:
652 case VPRecipeBase::VPHistogramSC:
653 // TODO: Widened stores don't define a value, but widened loads do. Split
654 // the recipes to be able to make widened loads VPSingleDefRecipes.
655 return false;
656 }
657 llvm_unreachable("Unhandled VPRecipeID");
658 }
659
660 static inline bool classof(const VPValue *V) {
661 auto *R = V->getDefiningRecipe();
662 return R && classof(R);
663 }
664
665 static inline bool classof(const VPUser *U) {
666 auto *R = dyn_cast<VPRecipeBase>(U);
667 return R && classof(R);
668 }
669
670 VPSingleDefRecipe *clone() override = 0;
671
672 /// Returns the underlying instruction.
679
680#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
681 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
683#endif
684};
685
686/// Class to record and manage LLVM IR flags.
689 enum class OperationType : unsigned char {
690 Cmp,
691 FCmp,
692 OverflowingBinOp,
693 Trunc,
694 DisjointOp,
695 PossiblyExactOp,
696 GEPOp,
697 FPMathOp,
698 NonNegOp,
699 ReductionOp,
700 Other
701 };
702
703public:
704 struct WrapFlagsTy {
705 char HasNUW : 1;
706 char HasNSW : 1;
707
709 };
710
712 char HasNUW : 1;
713 char HasNSW : 1;
714
716 };
717
722
724 char NonNeg : 1;
725 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
726 };
727
728private:
729 struct ExactFlagsTy {
730 char IsExact : 1;
731 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
732 };
733 struct FastMathFlagsTy {
734 char AllowReassoc : 1;
735 char NoNaNs : 1;
736 char NoInfs : 1;
737 char NoSignedZeros : 1;
738 char AllowReciprocal : 1;
739 char AllowContract : 1;
740 char ApproxFunc : 1;
741
742 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
743 };
744 /// Holds both the predicate and fast-math flags for floating-point
745 /// comparisons.
746 struct FCmpFlagsTy {
747 uint8_t CmpPredStorage;
748 FastMathFlagsTy FMFs;
749 };
750 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
751 struct ReductionFlagsTy {
752 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
753 // additional kinds.
754 unsigned char Kind : 6;
755 // TODO: Derive order/in-loop from plan and remove here.
756 unsigned char IsOrdered : 1;
757 unsigned char IsInLoop : 1;
758 FastMathFlagsTy FMFs;
759
760 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
761 FastMathFlags FMFs)
762 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
763 IsInLoop(IsInLoop), FMFs(FMFs) {}
764 };
765
766 OperationType OpType;
767
768 union {
773 ExactFlagsTy ExactFlags;
776 FastMathFlagsTy FMFs;
777 FCmpFlagsTy FCmpFlags;
778 ReductionFlagsTy ReductionFlags;
780 };
781
782public:
783 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
784
786 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
787 OpType = OperationType::FCmp;
789 FCmp->getPredicate());
790 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
791 FCmpFlags.FMFs = FCmp->getFastMathFlags();
792 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
793 OpType = OperationType::Cmp;
795 Op->getPredicate());
796 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
797 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
798 OpType = OperationType::DisjointOp;
799 DisjointFlags.IsDisjoint = Op->isDisjoint();
800 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
801 OpType = OperationType::OverflowingBinOp;
802 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
803 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
804 OpType = OperationType::Trunc;
805 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
806 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
807 OpType = OperationType::PossiblyExactOp;
808 ExactFlags.IsExact = Op->isExact();
809 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
810 OpType = OperationType::GEPOp;
811 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
812 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
813 "wrap flags truncated");
814 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
815 OpType = OperationType::NonNegOp;
816 NonNegFlags.NonNeg = PNNI->hasNonNeg();
817 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
818 OpType = OperationType::FPMathOp;
819 FMFs = Op->getFastMathFlags();
820 }
821 }
822
823 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
825 assert(getPredicate() == Pred && "predicate truncated");
826 }
827
829 : OpType(OperationType::FCmp), AllFlags() {
831 assert(getPredicate() == Pred && "predicate truncated");
832 FCmpFlags.FMFs = FMFs;
833 }
834
836 : OpType(OperationType::OverflowingBinOp), AllFlags() {
837 this->WrapFlags = WrapFlags;
838 }
839
841 : OpType(OperationType::Trunc), AllFlags() {
842 this->TruncFlags = TruncFlags;
843 }
844
845 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
846 this->FMFs = FMFs;
847 }
848
850 : OpType(OperationType::DisjointOp), AllFlags() {
851 this->DisjointFlags = DisjointFlags;
852 }
853
855 : OpType(OperationType::NonNegOp), AllFlags() {
856 this->NonNegFlags = NonNegFlags;
857 }
858
859 VPIRFlags(ExactFlagsTy ExactFlags)
860 : OpType(OperationType::PossiblyExactOp), AllFlags() {
861 this->ExactFlags = ExactFlags;
862 }
863
865 : OpType(OperationType::GEPOp), AllFlags() {
866 GEPFlagsStorage = GEPFlags.getRaw();
867 }
868
869 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
870 : OpType(OperationType::ReductionOp), AllFlags() {
871 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
872 }
873
875 OpType = Other.OpType;
876 AllFlags[0] = Other.AllFlags[0];
877 AllFlags[1] = Other.AllFlags[1];
878 }
879
880 /// Only keep flags also present in \p Other. \p Other must have the same
881 /// OpType as the current object.
882 void intersectFlags(const VPIRFlags &Other);
883
884 /// Drop all poison-generating flags.
886 // NOTE: This needs to be kept in-sync with
887 // Instruction::dropPoisonGeneratingFlags.
888 switch (OpType) {
889 case OperationType::OverflowingBinOp:
890 WrapFlags.HasNUW = false;
891 WrapFlags.HasNSW = false;
892 break;
893 case OperationType::Trunc:
894 TruncFlags.HasNUW = false;
895 TruncFlags.HasNSW = false;
896 break;
897 case OperationType::DisjointOp:
898 DisjointFlags.IsDisjoint = false;
899 break;
900 case OperationType::PossiblyExactOp:
901 ExactFlags.IsExact = false;
902 break;
903 case OperationType::GEPOp:
904 GEPFlagsStorage = 0;
905 break;
906 case OperationType::FPMathOp:
907 case OperationType::FCmp:
908 case OperationType::ReductionOp:
909 getFMFsRef().NoNaNs = false;
910 getFMFsRef().NoInfs = false;
911 break;
912 case OperationType::NonNegOp:
913 NonNegFlags.NonNeg = false;
914 break;
915 case OperationType::Cmp:
916 case OperationType::Other:
917 break;
918 }
919 }
920
921 /// Apply the IR flags to \p I.
922 void applyFlags(Instruction &I) const {
923 switch (OpType) {
924 case OperationType::OverflowingBinOp:
925 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
926 I.setHasNoSignedWrap(WrapFlags.HasNSW);
927 break;
928 case OperationType::Trunc:
929 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
930 I.setHasNoSignedWrap(TruncFlags.HasNSW);
931 break;
932 case OperationType::DisjointOp:
933 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
934 break;
935 case OperationType::PossiblyExactOp:
936 I.setIsExact(ExactFlags.IsExact);
937 break;
938 case OperationType::GEPOp:
939 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
941 break;
942 case OperationType::FPMathOp:
943 case OperationType::FCmp: {
944 const FastMathFlagsTy &F = getFMFsRef();
945 I.setHasAllowReassoc(F.AllowReassoc);
946 I.setHasNoNaNs(F.NoNaNs);
947 I.setHasNoInfs(F.NoInfs);
948 I.setHasNoSignedZeros(F.NoSignedZeros);
949 I.setHasAllowReciprocal(F.AllowReciprocal);
950 I.setHasAllowContract(F.AllowContract);
951 I.setHasApproxFunc(F.ApproxFunc);
952 break;
953 }
954 case OperationType::NonNegOp:
955 I.setNonNeg(NonNegFlags.NonNeg);
956 break;
957 case OperationType::ReductionOp:
958 llvm_unreachable("reduction ops should not use applyFlags");
959 case OperationType::Cmp:
960 case OperationType::Other:
961 break;
962 }
963 }
964
966 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
967 "recipe doesn't have a compare predicate");
968 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
971 }
972
974 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
975 "recipe doesn't have a compare predicate");
976 if (OpType == OperationType::FCmp)
978 else
980 assert(getPredicate() == Pred && "predicate truncated");
981 }
982
986
987 /// Returns true if the recipe has a comparison predicate.
988 bool hasPredicate() const {
989 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
990 }
991
992 /// Returns true if the recipe has fast-math flags.
993 bool hasFastMathFlags() const {
994 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
995 OpType == OperationType::ReductionOp;
996 }
997
999
1000 /// Returns true if the recipe has non-negative flag.
1001 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1002
1003 bool isNonNeg() const {
1004 assert(OpType == OperationType::NonNegOp &&
1005 "recipe doesn't have a NNEG flag");
1006 return NonNegFlags.NonNeg;
1007 }
1008
1009 bool hasNoUnsignedWrap() const {
1010 switch (OpType) {
1011 case OperationType::OverflowingBinOp:
1012 return WrapFlags.HasNUW;
1013 case OperationType::Trunc:
1014 return TruncFlags.HasNUW;
1015 default:
1016 llvm_unreachable("recipe doesn't have a NUW flag");
1017 }
1018 }
1019
1020 bool hasNoSignedWrap() const {
1021 switch (OpType) {
1022 case OperationType::OverflowingBinOp:
1023 return WrapFlags.HasNSW;
1024 case OperationType::Trunc:
1025 return TruncFlags.HasNSW;
1026 default:
1027 llvm_unreachable("recipe doesn't have a NSW flag");
1028 }
1029 }
1030
1031 bool hasNoWrapFlags() const {
1032 switch (OpType) {
1033 case OperationType::OverflowingBinOp:
1034 case OperationType::Trunc:
1035 return true;
1036 default:
1037 return false;
1038 }
1039 }
1040
1042 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1043 }
1044
1045 bool isDisjoint() const {
1046 assert(OpType == OperationType::DisjointOp &&
1047 "recipe cannot have a disjoing flag");
1048 return DisjointFlags.IsDisjoint;
1049 }
1050
1052 assert(OpType == OperationType::ReductionOp &&
1053 "recipe doesn't have reduction flags");
1054 return static_cast<RecurKind>(ReductionFlags.Kind);
1055 }
1056
1057 bool isReductionOrdered() const {
1058 assert(OpType == OperationType::ReductionOp &&
1059 "recipe doesn't have reduction flags");
1060 return ReductionFlags.IsOrdered;
1061 }
1062
1063 bool isReductionInLoop() const {
1064 assert(OpType == OperationType::ReductionOp &&
1065 "recipe doesn't have reduction flags");
1066 return ReductionFlags.IsInLoop;
1067 }
1068
1069private:
1070 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1071 FastMathFlagsTy &getFMFsRef() {
1072 if (OpType == OperationType::FCmp)
1073 return FCmpFlags.FMFs;
1074 if (OpType == OperationType::ReductionOp)
1075 return ReductionFlags.FMFs;
1076 return FMFs;
1077 }
1078 const FastMathFlagsTy &getFMFsRef() const {
1079 if (OpType == OperationType::FCmp)
1080 return FCmpFlags.FMFs;
1081 if (OpType == OperationType::ReductionOp)
1082 return ReductionFlags.FMFs;
1083 return FMFs;
1084 }
1085
1086public:
1087 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1088 /// otherwise. Opcodes not supporting default flags include compares and
1089 /// ComputeReductionResult.
1090 static VPIRFlags getDefaultFlags(unsigned Opcode);
1091
1092#if !defined(NDEBUG)
1093 /// Returns true if the set flags are valid for \p Opcode.
1094 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1095
1096 /// Returns true if \p Opcode has its required flags set.
1097 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1098#endif
1099
1100#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1101 void printFlags(raw_ostream &O) const;
1102#endif
1103};
1105
1106static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1107
1108/// A pure-virtual common base class for recipes defining a single VPValue and
1109/// using IR flags.
1111 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1112 const VPIRFlags &Flags,
1114 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1115
1116 static inline bool classof(const VPRecipeBase *R) {
1117 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1118 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1119 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1120 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1121 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1122 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1123 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1124 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1125 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1126 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1127 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1128 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1129 }
1130
1131 static inline bool classof(const VPUser *U) {
1132 auto *R = dyn_cast<VPRecipeBase>(U);
1133 return R && classof(R);
1134 }
1135
1136 static inline bool classof(const VPValue *V) {
1137 auto *R = V->getDefiningRecipe();
1138 return R && classof(R);
1139 }
1140
1142
1143 static inline bool classof(const VPSingleDefRecipe *R) {
1144 return classof(static_cast<const VPRecipeBase *>(R));
1145 }
1146
1147 void execute(VPTransformState &State) override = 0;
1148
1149 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1151 VPCostContext &Ctx) const;
1152};
1153
1154/// Helper to access the operand that contains the unroll part for this recipe
1155/// after unrolling.
1156template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1157protected:
1158 /// Return the VPValue operand containing the unroll part or null if there is
1159 /// no such operand.
1160 VPValue *getUnrollPartOperand(const VPUser &U) const;
1161
1162 /// Return the unroll part.
1163 unsigned getUnrollPart(const VPUser &U) const;
1164};
1165
1166/// Helper to manage IR metadata for recipes. It filters out metadata that
1167/// cannot be propagated.
1170
1171public:
1172 VPIRMetadata() = default;
1173
1174 /// Adds metatadata that can be preserved from the original instruction
1175 /// \p I.
1177
1178 /// Copy constructor for cloning.
1180
1182
1183 /// Add all metadata to \p I.
1184 void applyMetadata(Instruction &I) const;
1185
1186 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1187 /// already exists, it will be replaced. Otherwise, it will be added.
1188 void setMetadata(unsigned Kind, MDNode *Node) {
1189 auto It =
1190 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1191 return P.first == Kind;
1192 });
1193 if (It != Metadata.end())
1194 It->second = Node;
1195 else
1196 Metadata.emplace_back(Kind, Node);
1197 }
1198
1199 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1200 /// nodes that are common to both.
1201 void intersect(const VPIRMetadata &MD);
1202
1203 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1204 MDNode *getMetadata(unsigned Kind) const {
1205 auto It =
1206 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1207 return It != Metadata.end() ? It->second : nullptr;
1208 }
1209
1210#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1211 /// Print metadata with node IDs.
1212 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1213#endif
1214};
1215
1216/// This is a concrete Recipe that models a single VPlan-level instruction.
1217/// While as any Recipe it may generate a sequence of IR instructions when
1218/// executed, these instructions would always form a single-def expression as
1219/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1220/// opcodes can take an optional mask. Masks may be assigned during
1221/// predication.
1223 public VPIRMetadata {
1224 friend class VPlanSlp;
1225
1226public:
1227 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1228 enum {
1230 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1231 // values of a first-order recurrence.
1235 // Creates a mask where each lane is active (true) whilst the current
1236 // counter (first operand + index) is less than the second operand. i.e.
1237 // mask[i] = icmpt ult (op0 + i), op1
1238 // The size of the mask returned is VF * Multiplier (UF, third op).
1242 // Increment the canonical IV separately for each unrolled part.
1244 // Abstract instruction that compares two values and branches. This is
1245 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1248 // Branch with 2 boolean condition operands and 3 successors. If condition
1249 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1250 // successor 1; otherwise branches to successor 2. Expanded after region
1251 // dissolution into: (1) an OR of the two conditions branching to
1252 // middle.split or successor 2, and (2) middle.split branching to successor
1253 // 0 or successor 1 based on condition 0.
1256 /// Given operands of (the same) struct type, creates a struct of fixed-
1257 /// width vectors each containing a struct field of all operands. The
1258 /// number of operands matches the element count of every vector.
1260 /// Creates a fixed-width vector containing all operands. The number of
1261 /// operands matches the vector element count.
1263 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1264 /// abstract VPInstruction whose single defined VPValue represents VF
1265 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1266 /// VPInstructions.
1268 /// Reduce the operands to the final reduction result using the operation
1269 /// specified via the operation's VPIRFlags.
1271 // Extracts the last part of its operand. Removed during unrolling.
1273 // Extracts the last lane of its vector operand, per part.
1275 // Extracts the second-to-last lane from its operand or the second-to-last
1276 // part if it is scalar. In the latter case, the recipe will be removed
1277 // during unrolling.
1279 LogicalAnd, // Non-poison propagating logical And.
1280 LogicalOr, // Non-poison propagating logical Or.
1281 // Add an offset in bytes (second operand) to a base pointer (first
1282 // operand). Only generates scalar values (either for the first lane only or
1283 // for all lanes, depending on its uses).
1285 // Add a vector offset in bytes (second operand) to a scalar base pointer
1286 // (first operand).
1288 // Returns a scalar boolean value, which is true if any lane of its
1289 // (boolean) vector operands is true. It produces the reduced value across
1290 // all unrolled iterations. Unrolling will add all copies of its original
1291 // operand as additional operands. AnyOf is poison-safe as all operands
1292 // will be frozen.
1294 // Calculates the first active lane index of the vector predicate operands.
1295 // It produces the lane index across all unrolled iterations. Unrolling will
1296 // add all copies of its original operand as additional operands.
1297 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1298 // result even with operands that are all zeroes.
1300 // Calculates the last active lane index of the vector predicate operands.
1301 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1302 // tail-folding to extract the correct live-out value from the last active
1303 // iteration. It produces the lane index across all unrolled iterations.
1304 // Unrolling will add all copies of its original operand as additional
1305 // operands.
1307 // Returns a reversed vector for the operand.
1309
1310 // The opcodes below are used for VPInstructionWithType.
1311 //
1312 /// Scale the first operand (vector step) by the second operand
1313 /// (scalar-step). Casts both operands to the result type if needed.
1315 /// Start vector for reductions with 3 operands: the original start value,
1316 /// the identity value for the reduction and an integer indicating the
1317 /// scaling factor.
1319 // Creates a step vector starting from 0 to VF with a step of 1.
1321 /// Extracts a single lane (first operand) from a set of vector operands.
1322 /// The lane specifies an index into a vector formed by combining all vector
1323 /// operands (all operands after the first one).
1325 /// Explicit user for the resume phi of the canonical induction in the main
1326 /// VPlan, used by the epilogue vector loop.
1328 /// Extracts the last active lane from a set of vectors. The first operand
1329 /// is the default value if no lanes in the masks are active. Conceptually,
1330 /// this concatenates all data vectors (odd operands), concatenates all
1331 /// masks (even operands -- ignoring the default value), and returns the
1332 /// last active value from the combined data vector using the combined mask.
1334
1335 /// Returns the value for vscale.
1337 /// Compute the exiting value of a wide induction after vectorization, that
1338 /// is the value of the last lane of the induction increment (i.e. its
1339 /// backedge value). Has the wide induction recipe as operand.
1343 };
1344
1345 /// Returns true if this VPInstruction generates scalar values for all lanes.
1346 /// Most VPInstructions generate a single value per part, either vector or
1347 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1348 /// values per all lanes, stemming from an original ingredient. This method
1349 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1350 /// underlying ingredient.
1351 bool doesGeneratePerAllLanes() const;
1352
1353 /// Return the number of operands determined by the opcode of the
1354 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1355 /// cannot be determined directly by the opcode.
1356 unsigned getNumOperandsForOpcode() const;
1357
1358private:
1359 typedef unsigned char OpcodeTy;
1360 OpcodeTy Opcode;
1361
1362 /// An optional name that can be used for the generated IR instruction.
1363 std::string Name;
1364
1365 /// Returns true if we can generate a scalar for the first lane only if
1366 /// needed.
1367 bool canGenerateScalarForFirstLane() const;
1368
1369 /// Utility methods serving execute(): generates a single vector instance of
1370 /// the modeled instruction. \returns the generated value. . In some cases an
1371 /// existing value is returned rather than a generated one.
1372 Value *generate(VPTransformState &State);
1373
1374 /// Returns true if the VPInstruction does not need masking.
1375 bool alwaysUnmasked() const {
1376 if (Opcode == VPInstruction::MaskedCond)
1377 return false;
1378
1379 // For now only VPInstructions with underlying values use masks.
1380 // TODO: provide masks to VPInstructions w/o underlying values.
1381 if (!getUnderlyingValue())
1382 return true;
1383
1384 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1385 }
1386
1387public:
1388 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1389 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1390 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1391
1392 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1393
1394 VPInstruction *clone() override {
1395 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1396 getDebugLoc(), Name);
1397 if (getUnderlyingValue())
1398 New->setUnderlyingValue(getUnderlyingInstr());
1399 return New;
1400 }
1401
1402 unsigned getOpcode() const { return Opcode; }
1403
1404 /// Generate the instruction.
1405 /// TODO: We currently execute only per-part unless a specific instance is
1406 /// provided.
1407 void execute(VPTransformState &State) override;
1408
1409 /// Return the cost of this VPInstruction.
1410 InstructionCost computeCost(ElementCount VF,
1411 VPCostContext &Ctx) const override;
1412
1413#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1414 /// Print the VPInstruction to dbgs() (for debugging).
1415 LLVM_DUMP_METHOD void dump() const;
1416#endif
1417
1418 bool hasResult() const {
1419 // CallInst may or may not have a result, depending on the called function.
1420 // Conservatively return calls have results for now.
1421 switch (getOpcode()) {
1422 case Instruction::Ret:
1423 case Instruction::UncondBr:
1424 case Instruction::CondBr:
1425 case Instruction::Store:
1426 case Instruction::Switch:
1427 case Instruction::IndirectBr:
1428 case Instruction::Resume:
1429 case Instruction::CatchRet:
1430 case Instruction::Unreachable:
1431 case Instruction::Fence:
1432 case Instruction::AtomicRMW:
1436 return false;
1437 default:
1438 return true;
1439 }
1440 }
1441
1442 /// Returns true if the VPInstruction has a mask operand.
1443 bool isMasked() const {
1444 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1445 // VPInstructions without a fixed number of operands cannot be masked.
1446 if (NumOpsForOpcode == -1u)
1447 return false;
1448 return NumOpsForOpcode + 1 == getNumOperands();
1449 }
1450
1451 /// Returns the number of operands, excluding the mask if the VPInstruction is
1452 /// masked.
1453 unsigned getNumOperandsWithoutMask() const {
1454 return getNumOperands() - isMasked();
1455 }
1456
1457 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1458 void addMask(VPValue *Mask) {
1459 assert(!isMasked() && "recipe is already masked");
1460 if (alwaysUnmasked())
1461 return;
1462 addOperand(Mask);
1463 }
1464
1465 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1466 /// VPInstructions.
1467 VPValue *getMask() const {
1468 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1469 }
1470
1471 /// Returns an iterator range over the operands excluding the mask operand
1472 /// if present.
1479
1480 /// Returns true if the underlying opcode may read from or write to memory.
1481 bool opcodeMayReadOrWriteFromMemory() const;
1482
1483 /// Returns true if the recipe only uses the first lane of operand \p Op.
1484 bool usesFirstLaneOnly(const VPValue *Op) const override;
1485
1486 /// Returns true if the recipe only uses the first part of operand \p Op.
1487 bool usesFirstPartOnly(const VPValue *Op) const override;
1488
1489 /// Returns true if this VPInstruction produces a scalar value from a vector,
1490 /// e.g. by performing a reduction or extracting a lane.
1491 bool isVectorToScalar() const;
1492
1493 /// Returns true if this VPInstruction's operands are single scalars and the
1494 /// result is also a single scalar.
1495 bool isSingleScalar() const;
1496
1497 /// Returns the symbolic name assigned to the VPInstruction.
1498 StringRef getName() const { return Name; }
1499
1500 /// Set the symbolic name for the VPInstruction.
1501 void setName(StringRef NewName) { Name = NewName.str(); }
1502
1503protected:
1504#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1505 /// Print the VPInstruction to \p O.
1506 void printRecipe(raw_ostream &O, const Twine &Indent,
1507 VPSlotTracker &SlotTracker) const override;
1508#endif
1509};
1510
1511/// A specialization of VPInstruction augmenting it with a dedicated result
1512/// type, to be used when the opcode and operands of the VPInstruction don't
1513/// directly determine the result type. Note that there is no separate recipe ID
1514/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1515/// distinguished purely by the opcode.
1517 /// Scalar result type produced by the recipe.
1518 Type *ResultTy;
1519
1520public:
1522 Type *ResultTy, const VPIRFlags &Flags = {},
1523 const VPIRMetadata &Metadata = {},
1525 const Twine &Name = "")
1526 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1527 ResultTy(ResultTy) {}
1528
1529 static inline bool classof(const VPRecipeBase *R) {
1530 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1531 // type information.
1532 if (R->isScalarCast())
1533 return true;
1534 auto *VPI = dyn_cast<VPInstruction>(R);
1535 if (!VPI)
1536 return false;
1537 switch (VPI->getOpcode()) {
1541 case Instruction::Load:
1542 return true;
1543 default:
1544 return false;
1545 }
1546 }
1547
1548 static inline bool classof(const VPUser *R) {
1550 }
1551
1552 VPInstruction *clone() override {
1553 auto *New =
1555 *this, *this, getDebugLoc(), getName());
1556 New->setUnderlyingValue(getUnderlyingValue());
1557 return New;
1558 }
1559
1560 void execute(VPTransformState &State) override;
1561
1562 /// Return the cost of this VPInstruction.
1564 VPCostContext &Ctx) const override {
1565 // TODO: Compute accurate cost after retiring the legacy cost model.
1566 return 0;
1567 }
1568
1569 Type *getResultType() const { return ResultTy; }
1570
1571protected:
1572#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1573 /// Print the recipe.
1574 void printRecipe(raw_ostream &O, const Twine &Indent,
1575 VPSlotTracker &SlotTracker) const override;
1576#endif
1577};
1578
1579/// Helper type to provide functions to access incoming values and blocks for
1580/// phi-like recipes.
1582protected:
1583 /// Return a VPRecipeBase* to the current object.
1584 virtual const VPRecipeBase *getAsRecipe() const = 0;
1585
1586public:
1587 virtual ~VPPhiAccessors() = default;
1588
1589 /// Returns the incoming VPValue with index \p Idx.
1590 VPValue *getIncomingValue(unsigned Idx) const {
1591 return getAsRecipe()->getOperand(Idx);
1592 }
1593
1594 /// Returns the incoming block with index \p Idx.
1595 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1596
1597 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1598 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1599
1600 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1601 /// block.
1602 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1603
1604 /// Returns the number of incoming values, also number of incoming blocks.
1605 virtual unsigned getNumIncoming() const {
1606 return getAsRecipe()->getNumOperands();
1607 }
1608
1609 /// Returns an interator range over the incoming values.
1611 return make_range(getAsRecipe()->op_begin(),
1612 getAsRecipe()->op_begin() + getNumIncoming());
1613 }
1614
1616 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1617
1618 /// Returns an iterator range over the incoming blocks.
1620 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1621 return getIncomingBlock(Idx);
1622 };
1623 return map_range(index_range(0, getNumIncoming()), GetBlock);
1624 }
1625
1626 /// Returns an iterator range over pairs of incoming values and corresponding
1627 /// incoming blocks.
1633
1634 /// Removes the incoming value for \p IncomingBlock, which must be a
1635 /// predecessor.
1636 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1637
1638#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1639 /// Print the recipe.
1641#endif
1642};
1643
1646 const Twine &Name = "")
1647 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1648
1649 static inline bool classof(const VPUser *U) {
1650 auto *VPI = dyn_cast<VPInstruction>(U);
1651 return VPI && VPI->getOpcode() == Instruction::PHI;
1652 }
1653
1654 static inline bool classof(const VPValue *V) {
1655 auto *VPI = dyn_cast<VPInstruction>(V);
1656 return VPI && VPI->getOpcode() == Instruction::PHI;
1657 }
1658
1659 static inline bool classof(const VPSingleDefRecipe *SDR) {
1660 auto *VPI = dyn_cast<VPInstruction>(SDR);
1661 return VPI && VPI->getOpcode() == Instruction::PHI;
1662 }
1663
1664 VPPhi *clone() override {
1665 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1666 PhiR->setUnderlyingValue(getUnderlyingValue());
1667 return PhiR;
1668 }
1669
1670 void execute(VPTransformState &State) override;
1671
1672protected:
1673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 /// Print the recipe.
1675 void printRecipe(raw_ostream &O, const Twine &Indent,
1676 VPSlotTracker &SlotTracker) const override;
1677#endif
1678
1679 const VPRecipeBase *getAsRecipe() const override { return this; }
1680};
1681
1682/// A recipe to wrap on original IR instruction not to be modified during
1683/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1684/// Expect PHIs, VPIRInstructions cannot have any operands.
1686 Instruction &I;
1687
1688protected:
1689 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1690 /// subclasses may need to be created, e.g. VPIRPhi.
1692 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1693
1694public:
1695 ~VPIRInstruction() override = default;
1696
1697 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1698 /// VPIRInstruction.
1700
1701 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1702
1704 auto *R = create(I);
1705 for (auto *Op : operands())
1706 R->addOperand(Op);
1707 return R;
1708 }
1709
1710 void execute(VPTransformState &State) override;
1711
1712 /// Return the cost of this VPIRInstruction.
1714 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1715
1716 Instruction &getInstruction() const { return I; }
1717
1718 bool usesScalars(const VPValue *Op) const override {
1720 "Op must be an operand of the recipe");
1721 return true;
1722 }
1723
1724 bool usesFirstPartOnly(const VPValue *Op) const override {
1726 "Op must be an operand of the recipe");
1727 return true;
1728 }
1729
1730 bool usesFirstLaneOnly(const VPValue *Op) const override {
1732 "Op must be an operand of the recipe");
1733 return true;
1734 }
1735
1736protected:
1737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1738 /// Print the recipe.
1739 void printRecipe(raw_ostream &O, const Twine &Indent,
1740 VPSlotTracker &SlotTracker) const override;
1741#endif
1742};
1743
1744/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1745/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1746/// allowed, and it is used to add a new incoming value for the single
1747/// predecessor VPBB.
1749 public VPPhiAccessors {
1751
1752 static inline bool classof(const VPRecipeBase *U) {
1753 auto *R = dyn_cast<VPIRInstruction>(U);
1754 return R && isa<PHINode>(R->getInstruction());
1755 }
1756
1757 static inline bool classof(const VPUser *U) {
1758 auto *R = dyn_cast<VPRecipeBase>(U);
1759 return R && classof(R);
1760 }
1761
1763
1764 void execute(VPTransformState &State) override;
1765
1766protected:
1767#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1768 /// Print the recipe.
1769 void printRecipe(raw_ostream &O, const Twine &Indent,
1770 VPSlotTracker &SlotTracker) const override;
1771#endif
1772
1773 const VPRecipeBase *getAsRecipe() const override { return this; }
1774};
1775
1776/// VPWidenRecipe is a recipe for producing a widened instruction using the
1777/// opcode and operands of the recipe. This recipe covers most of the
1778/// traditional vectorization cases where each recipe transforms into a
1779/// vectorized version of itself.
1781 public VPIRMetadata {
1782 unsigned Opcode;
1783
1784public:
1786 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1787 DebugLoc DL = {})
1788 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1789 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1790 setUnderlyingValue(&I);
1791 }
1792
1793 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1794 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1795 DebugLoc DL = {})
1796 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1797 VPIRMetadata(Metadata), Opcode(Opcode) {}
1798
1799 ~VPWidenRecipe() override = default;
1800
1801 VPWidenRecipe *clone() override {
1802 if (auto *UV = getUnderlyingValue())
1803 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1804 getDebugLoc());
1805 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1806 }
1807
1808 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1809
1810 /// Produce a widened instruction using the opcode and operands of the recipe,
1811 /// processing State.VF elements.
1812 void execute(VPTransformState &State) override;
1813
1814 /// Return the cost of this VPWidenRecipe.
1815 InstructionCost computeCost(ElementCount VF,
1816 VPCostContext &Ctx) const override;
1817
1818 unsigned getOpcode() const { return Opcode; }
1819
1820protected:
1821#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1822 /// Print the recipe.
1823 void printRecipe(raw_ostream &O, const Twine &Indent,
1824 VPSlotTracker &SlotTracker) const override;
1825#endif
1826
1827 /// Returns true if the recipe only uses the first lane of operand \p Op.
1828 bool usesFirstLaneOnly(const VPValue *Op) const override {
1830 "Op must be an operand of the recipe");
1831 return Opcode == Instruction::Select && Op == getOperand(0) &&
1832 Op->isDefinedOutsideLoopRegions();
1833 }
1834};
1835
1836/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1838 /// Cast instruction opcode.
1839 Instruction::CastOps Opcode;
1840
1841 /// Result type for the cast.
1842 Type *ResultTy;
1843
1844public:
1846 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1847 const VPIRMetadata &Metadata = {},
1849 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1850 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1851 assert(flagsValidForOpcode(Opcode) &&
1852 "Set flags not supported for the provided opcode");
1854 "Opcode requires specific flags to be set");
1856 }
1857
1858 ~VPWidenCastRecipe() override = default;
1859
1861 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1863 *this, *this, getDebugLoc());
1864 }
1865
1866 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1867
1868 /// Produce widened copies of the cast.
1869 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1870
1871 /// Return the cost of this VPWidenCastRecipe.
1873 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1874
1875 Instruction::CastOps getOpcode() const { return Opcode; }
1876
1877 /// Returns the result type of the cast.
1878 Type *getResultType() const { return ResultTy; }
1879
1880protected:
1881#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1882 /// Print the recipe.
1883 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1884 VPSlotTracker &SlotTracker) const override;
1885#endif
1886};
1887
1888/// A recipe for widening vector intrinsics.
1890 /// ID of the vector intrinsic to widen.
1891 Intrinsic::ID VectorIntrinsicID;
1892
1893 /// Scalar return type of the intrinsic.
1894 Type *ResultTy;
1895
1896 /// True if the intrinsic may read from memory.
1897 bool MayReadFromMemory;
1898
1899 /// True if the intrinsic may read write to memory.
1900 bool MayWriteToMemory;
1901
1902 /// True if the intrinsic may have side-effects.
1903 bool MayHaveSideEffects;
1904
1905public:
1907 ArrayRef<VPValue *> CallArguments, Type *Ty,
1908 const VPIRFlags &Flags = {},
1909 const VPIRMetadata &MD = {},
1911 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1912 Flags, DL),
1913 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1914 MayReadFromMemory(CI.mayReadFromMemory()),
1915 MayWriteToMemory(CI.mayWriteToMemory()),
1916 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1917 setUnderlyingValue(&CI);
1918 }
1919
1921 ArrayRef<VPValue *> CallArguments, Type *Ty,
1922 const VPIRFlags &Flags = {},
1923 const VPIRMetadata &Metadata = {},
1925 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1926 Flags, DL),
1927 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1928 ResultTy(Ty) {
1929 LLVMContext &Ctx = Ty->getContext();
1930 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1931 MemoryEffects ME = Attrs.getMemoryEffects();
1932 MayReadFromMemory = !ME.onlyWritesMemory();
1933 MayWriteToMemory = !ME.onlyReadsMemory();
1934 MayHaveSideEffects = MayWriteToMemory ||
1935 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1936 !Attrs.hasAttribute(Attribute::WillReturn);
1937 }
1938
1939 ~VPWidenIntrinsicRecipe() override = default;
1940
1942 if (Value *CI = getUnderlyingValue())
1943 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1944 operands(), ResultTy, *this, *this,
1945 getDebugLoc());
1946 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1947 *this, *this, getDebugLoc());
1948 }
1949
1950 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1951
1952 /// Produce a widened version of the vector intrinsic.
1953 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1954
1955 /// Return the cost of this vector intrinsic.
1957 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1958
1959 /// Return the ID of the intrinsic.
1960 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1961
1962 /// Return the scalar return type of the intrinsic.
1963 Type *getResultType() const { return ResultTy; }
1964
1965 /// Return to name of the intrinsic as string.
1967
1968 /// Returns true if the intrinsic may read from memory.
1969 bool mayReadFromMemory() const { return MayReadFromMemory; }
1970
1971 /// Returns true if the intrinsic may write to memory.
1972 bool mayWriteToMemory() const { return MayWriteToMemory; }
1973
1974 /// Returns true if the intrinsic may have side-effects.
1975 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1976
1977 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1978
1979protected:
1980#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1981 /// Print the recipe.
1982 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1983 VPSlotTracker &SlotTracker) const override;
1984#endif
1985};
1986
1987/// A recipe for widening Call instructions using library calls.
1989 public VPIRMetadata {
1990 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1991 /// between a given VF and the chosen vectorized variant, so there will be a
1992 /// different VPlan for each VF with a valid variant.
1993 Function *Variant;
1994
1995public:
1997 ArrayRef<VPValue *> CallArguments,
1998 const VPIRFlags &Flags = {},
1999 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2000 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
2001 DL),
2002 VPIRMetadata(Metadata), Variant(Variant) {
2003 setUnderlyingValue(UV);
2004 assert(
2005 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2006 "last operand must be the called function");
2007 }
2008
2009 ~VPWidenCallRecipe() override = default;
2010
2012 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2013 *this, *this, getDebugLoc());
2014 }
2015
2016 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2017
2018 /// Produce a widened version of the call instruction.
2019 void execute(VPTransformState &State) override;
2020
2021 /// Return the cost of this VPWidenCallRecipe.
2022 InstructionCost computeCost(ElementCount VF,
2023 VPCostContext &Ctx) const override;
2024
2028
2031
2032protected:
2033#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2034 /// Print the recipe.
2035 void printRecipe(raw_ostream &O, const Twine &Indent,
2036 VPSlotTracker &SlotTracker) const override;
2037#endif
2038};
2039
2040/// A recipe representing a sequence of load -> update -> store as part of
2041/// a histogram operation. This means there may be aliasing between vector
2042/// lanes, which is handled by the llvm.experimental.vector.histogram family
2043/// of intrinsics. The only update operations currently supported are
2044/// 'add' and 'sub' where the other term is loop-invariant.
2046 /// Opcode of the update operation, currently either add or sub.
2047 unsigned Opcode;
2048
2049public:
2050 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2052 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2053 Opcode(Opcode) {}
2054
2055 ~VPHistogramRecipe() override = default;
2056
2058 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2059 }
2060
2061 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2062
2063 /// Produce a vectorized histogram operation.
2064 void execute(VPTransformState &State) override;
2065
2066 /// Return the cost of this VPHistogramRecipe.
2068 VPCostContext &Ctx) const override;
2069
2070 unsigned getOpcode() const { return Opcode; }
2071
2072 /// Return the mask operand if one was provided, or a null pointer if all
2073 /// lanes should be executed unconditionally.
2074 VPValue *getMask() const {
2075 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2076 }
2077
2078protected:
2079#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2080 /// Print the recipe
2081 void printRecipe(raw_ostream &O, const Twine &Indent,
2082 VPSlotTracker &SlotTracker) const override;
2083#endif
2084};
2085
2086/// A recipe for handling GEP instructions.
2088 Type *SourceElementTy;
2089
2090 bool isPointerLoopInvariant() const {
2091 return getOperand(0)->isDefinedOutsideLoopRegions();
2092 }
2093
2094 bool isIndexLoopInvariant(unsigned I) const {
2095 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2096 }
2097
2098public:
2100 const VPIRFlags &Flags = {},
2102 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2103 SourceElementTy(GEP->getSourceElementType()) {
2104 setUnderlyingValue(GEP);
2106 (void)Metadata;
2108 assert(Metadata.empty() && "unexpected metadata on GEP");
2109 }
2110
2111 ~VPWidenGEPRecipe() override = default;
2112
2115 operands(), *this, getDebugLoc());
2116 }
2117
2118 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2119
2120 /// This recipe generates a GEP instruction.
2121 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2122
2123 /// Generate the gep nodes.
2124 void execute(VPTransformState &State) override;
2125
2126 Type *getSourceElementType() const { return SourceElementTy; }
2127
2128 /// Return the cost of this VPWidenGEPRecipe.
2130 VPCostContext &Ctx) const override {
2131 // TODO: Compute accurate cost after retiring the legacy cost model.
2132 return 0;
2133 }
2134
2135 /// Returns true if the recipe only uses the first lane of operand \p Op.
2136 bool usesFirstLaneOnly(const VPValue *Op) const override;
2137
2138protected:
2139#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2140 /// Print the recipe.
2141 void printRecipe(raw_ostream &O, const Twine &Indent,
2142 VPSlotTracker &SlotTracker) const override;
2143#endif
2144};
2145
2146/// A recipe to compute a pointer to the last element of each part of a widened
2147/// memory access for widened memory accesses of SourceElementTy. Used for
2148/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2149/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2150/// unroller otherwise.
2152 Type *SourceElementTy;
2153
2154 /// The constant stride of the pointer computed by this recipe, expressed in
2155 /// units of SourceElementTy.
2156 int64_t Stride;
2157
2158public:
2159 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2160 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2161 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2162 GEPFlags, DL),
2163 SourceElementTy(SourceElementTy), Stride(Stride) {
2164 assert(Stride < 0 && "Stride must be negative");
2165 }
2166
2167 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2168
2169 Type *getSourceElementType() const { return SourceElementTy; }
2170 int64_t getStride() const { return Stride; }
2171 VPValue *getPointer() const { return getOperand(0); }
2172 VPValue *getVFValue() const { return getOperand(1); }
2174 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2175 }
2176
2177 /// Adds the offset operand to the recipe.
2178 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2179 void materializeOffset(unsigned Part = 0);
2180
2181 void execute(VPTransformState &State) override;
2182
2183 bool usesFirstLaneOnly(const VPValue *Op) const override {
2185 "Op must be an operand of the recipe");
2186 return true;
2187 }
2188
2189 /// Return the cost of this VPVectorPointerRecipe.
2191 VPCostContext &Ctx) const override {
2192 // TODO: Compute accurate cost after retiring the legacy cost model.
2193 return 0;
2194 }
2195
2196 /// Returns true if the recipe only uses the first part of operand \p Op.
2197 bool usesFirstPartOnly(const VPValue *Op) const override {
2199 "Op must be an operand of the recipe");
2200 assert(getNumOperands() <= 2 && "must have at most two operands");
2201 return true;
2202 }
2203
2205 auto *VEPR = new VPVectorEndPointerRecipe(
2208 if (auto *Offset = getOffset())
2209 VEPR->addOperand(Offset);
2210 return VEPR;
2211 }
2212
2213protected:
2214#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2215 /// Print the recipe.
2216 void printRecipe(raw_ostream &O, const Twine &Indent,
2217 VPSlotTracker &SlotTracker) const override;
2218#endif
2219};
2220
2221/// A recipe to compute the pointers for widened memory accesses of \p
2222/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2223/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2225 Type *SourceElementTy;
2226
2227public:
2228 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2229 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2230 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2231 SourceElementTy(SourceElementTy) {}
2232
2233 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2234
2236 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2237 }
2238
2239 void execute(VPTransformState &State) override;
2240
2241 Type *getSourceElementType() const { return SourceElementTy; }
2242
2243 bool usesFirstLaneOnly(const VPValue *Op) const override {
2245 "Op must be an operand of the recipe");
2246 return true;
2247 }
2248
2249 /// Returns true if the recipe only uses the first part of operand \p Op.
2250 bool usesFirstPartOnly(const VPValue *Op) const override {
2252 "Op must be an operand of the recipe");
2253 assert(getNumOperands() <= 2 && "must have at most two operands");
2254 return true;
2255 }
2256
2258 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2260 if (auto *Off = getOffset())
2261 Clone->addOperand(Off);
2262 return Clone;
2263 }
2264
2265 /// Return the cost of this VPHeaderPHIRecipe.
2267 VPCostContext &Ctx) const override {
2268 // TODO: Compute accurate cost after retiring the legacy cost model.
2269 return 0;
2270 }
2271
2272protected:
2273#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2274 /// Print the recipe.
2275 void printRecipe(raw_ostream &O, const Twine &Indent,
2276 VPSlotTracker &SlotTracker) const override;
2277#endif
2278};
2279
2280/// A pure virtual base class for all recipes modeling header phis, including
2281/// phis for first order recurrences, pointer inductions and reductions. The
2282/// start value is the first operand of the recipe and the incoming value from
2283/// the backedge is the second operand.
2284///
2285/// Inductions are modeled using the following sub-classes:
2286/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2287/// floating point inductions with arbitrary start and step values. Produces
2288/// a vector PHI per-part.
2289/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2290/// pointer induction. Produces either a vector PHI per-part or scalar values
2291/// per-lane based on the canonical induction.
2292/// * VPFirstOrderRecurrencePHIRecipe
2293/// * VPReductionPHIRecipe
2294/// * VPActiveLaneMaskPHIRecipe
2295/// * VPEVLBasedIVPHIRecipe
2296///
2297/// Note that the canonical IV is modeled as a VPRegionValue associated with
2298/// its loop region.
2300 public VPPhiAccessors {
2301protected:
2302 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2303 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2304 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2305
2306 const VPRecipeBase *getAsRecipe() const override { return this; }
2307
2308public:
2309 ~VPHeaderPHIRecipe() override = default;
2310
2311 /// Method to support type inquiry through isa, cast, and dyn_cast.
2312 static inline bool classof(const VPRecipeBase *R) {
2313 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2314 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2315 }
2316 static inline bool classof(const VPValue *V) {
2317 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2318 }
2319 static inline bool classof(const VPSingleDefRecipe *R) {
2320 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2321 }
2322
2323 /// Generate the phi nodes.
2324 void execute(VPTransformState &State) override = 0;
2325
2326 /// Return the cost of this header phi recipe.
2328 VPCostContext &Ctx) const override;
2329
2330 /// Returns the start value of the phi, if one is set.
2332 return getNumOperands() == 0 ? nullptr : getOperand(0);
2333 }
2335 return getNumOperands() == 0 ? nullptr : getOperand(0);
2336 }
2337
2338 /// Update the start value of the recipe.
2340
2341 /// Returns the incoming value from the loop backedge.
2343 return getOperand(1);
2344 }
2345
2346 /// Update the incoming value from the loop backedge.
2348
2349 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2350 /// to be a recipe.
2352 return *getBackedgeValue()->getDefiningRecipe();
2353 }
2354
2355protected:
2356#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2357 /// Print the recipe.
2358 void printRecipe(raw_ostream &O, const Twine &Indent,
2359 VPSlotTracker &SlotTracker) const override = 0;
2360#endif
2361};
2362
2363/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2364/// VPWidenPointerInductionRecipe), providing shared functionality, including
2365/// retrieving the step value, induction descriptor and original phi node.
2367 const InductionDescriptor &IndDesc;
2368
2369public:
2370 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2371 VPValue *Step, const InductionDescriptor &IndDesc,
2372 DebugLoc DL)
2373 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2374 addOperand(Step);
2375 }
2376
2377 static inline bool classof(const VPRecipeBase *R) {
2378 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2379 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2380 }
2381
2382 static inline bool classof(const VPValue *V) {
2383 auto *R = V->getDefiningRecipe();
2384 return R && classof(R);
2385 }
2386
2387 static inline bool classof(const VPSingleDefRecipe *R) {
2388 return classof(static_cast<const VPRecipeBase *>(R));
2389 }
2390
2391 void execute(VPTransformState &State) override = 0;
2392
2393 /// Returns the start value of the induction.
2395
2396 /// Returns the step value of the induction.
2398 const VPValue *getStepValue() const { return getOperand(1); }
2399
2400 /// Update the step value of the recipe.
2401 void setStepValue(VPValue *V) { setOperand(1, V); }
2402
2404 const VPValue *getVFValue() const { return getOperand(2); }
2405
2406 /// Returns the number of incoming values, also number of incoming blocks.
2407 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2408 /// incoming value, its start value.
2409 unsigned getNumIncoming() const override { return 1; }
2410
2411 /// Returns the underlying PHINode if one exists, or null otherwise.
2415
2416 /// Returns the induction descriptor for the recipe.
2417 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2418
2420 // TODO: All operands of base recipe must exist and be at same index in
2421 // derived recipe.
2423 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2424 }
2425
2427 // TODO: All operands of base recipe must exist and be at same index in
2428 // derived recipe.
2430 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2431 }
2432
2433 /// Returns true if the recipe only uses the first lane of operand \p Op.
2434 bool usesFirstLaneOnly(const VPValue *Op) const override {
2436 "Op must be an operand of the recipe");
2437 // The recipe creates its own wide start value, so it only requests the
2438 // first lane of the operand.
2439 // TODO: Remove once creating the start value is modeled separately.
2440 return Op == getStartValue() || Op == getStepValue();
2441 }
2442};
2443
2444/// A recipe for handling phi nodes of integer and floating-point inductions,
2445/// producing their vector values. This is an abstract recipe and must be
2446/// converted to concrete recipes before executing.
2448 public VPIRFlags {
2449 TruncInst *Trunc;
2450
2451 // If this recipe is unrolled it will have 2 additional operands.
2452 bool isUnrolled() const { return getNumOperands() == 5; }
2453
2454public:
2456 VPValue *VF, const InductionDescriptor &IndDesc,
2457 const VPIRFlags &Flags, DebugLoc DL)
2458 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2459 Start, Step, IndDesc, DL),
2460 VPIRFlags(Flags), Trunc(nullptr) {
2461 addOperand(VF);
2462 }
2463
2465 VPValue *VF, const InductionDescriptor &IndDesc,
2466 TruncInst *Trunc, const VPIRFlags &Flags,
2467 DebugLoc DL)
2468 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2469 Start, Step, IndDesc, DL),
2470 VPIRFlags(Flags), Trunc(Trunc) {
2471 addOperand(VF);
2473 (void)Metadata;
2474 if (Trunc)
2476 assert(Metadata.empty() && "unexpected metadata on Trunc");
2477 }
2478
2480
2486
2487 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2488
2489 void execute(VPTransformState &State) override {
2490 llvm_unreachable("cannot execute this recipe, should be expanded via "
2491 "expandVPWidenIntOrFpInductionRecipe");
2492 }
2493
2494 /// Returns the start value of the induction.
2496
2497 /// If the recipe has been unrolled, return the VPValue for the induction
2498 /// increment, otherwise return null.
2500 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2501 }
2502
2503 /// Returns the number of incoming values, also number of incoming blocks.
2504 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2505 /// incoming value, its start value.
2506 unsigned getNumIncoming() const override { return 1; }
2507
2508 /// Returns the first defined value as TruncInst, if it is one or nullptr
2509 /// otherwise.
2510 TruncInst *getTruncInst() { return Trunc; }
2511 const TruncInst *getTruncInst() const { return Trunc; }
2512
2513 /// Returns true if the induction is canonical, i.e. starting at 0 and
2514 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2515 /// same type as the canonical induction.
2516 bool isCanonical() const;
2517
2518 /// Returns the scalar type of the induction.
2520 return Trunc ? Trunc->getType() : getStartValue()->getType();
2521 }
2522
2523 /// Returns the VPValue representing the value of this induction at
2524 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2525 /// take place.
2527 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2528 }
2529
2530protected:
2531#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2532 /// Print the recipe.
2533 void printRecipe(raw_ostream &O, const Twine &Indent,
2534 VPSlotTracker &SlotTracker) const override;
2535#endif
2536};
2537
2539public:
2540 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2541 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2542 /// VF*UF.
2544 VPValue *NumUnrolledElems,
2545 const InductionDescriptor &IndDesc, DebugLoc DL)
2546 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2547 Start, Step, IndDesc, DL) {
2548 addOperand(NumUnrolledElems);
2549 }
2550
2552
2558
2559 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2560
2561 /// Generate vector values for the pointer induction.
2562 void execute(VPTransformState &State) override {
2563 llvm_unreachable("cannot execute this recipe, should be expanded via "
2564 "expandVPWidenPointerInduction");
2565 };
2566
2567 /// Returns true if only scalar values will be generated.
2568 bool onlyScalarsGenerated(bool IsScalable);
2569
2570protected:
2571#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2572 /// Print the recipe.
2573 void printRecipe(raw_ostream &O, const Twine &Indent,
2574 VPSlotTracker &SlotTracker) const override;
2575#endif
2576};
2577
2578/// A recipe for widened phis. Incoming values are operands of the recipe and
2579/// their operand index corresponds to the incoming predecessor block. If the
2580/// recipe is placed in an entry block to a (non-replicate) region, it must have
2581/// exactly 2 incoming values, the first from the predecessor of the region and
2582/// the second from the exiting block of the region.
2584 public VPPhiAccessors {
2585 /// Name to use for the generated IR instruction for the widened phi.
2586 std::string Name;
2587
2588public:
2589 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2590 /// debug location \p DL.
2591 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr,
2592 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2593 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, {}, Phi, DL),
2594 Name(Name.str()) {
2595 if (Start)
2596 addOperand(Start);
2597 }
2598
2600 auto *C =
2602 getOperand(0), getDebugLoc(), Name);
2604 C->addOperand(Op);
2605 return C;
2606 }
2607
2608 ~VPWidenPHIRecipe() override = default;
2609
2610 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2611
2612 /// Generate the phi/select nodes.
2613 void execute(VPTransformState &State) override;
2614
2615 /// Return the cost of this VPWidenPHIRecipe.
2617 VPCostContext &Ctx) const override;
2618
2619protected:
2620#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2621 /// Print the recipe.
2622 void printRecipe(raw_ostream &O, const Twine &Indent,
2623 VPSlotTracker &SlotTracker) const override;
2624#endif
2625
2626 const VPRecipeBase *getAsRecipe() const override { return this; }
2627};
2628
2629/// A recipe for handling first-order recurrence phis. The start value is the
2630/// first operand of the recipe and the incoming value from the backedge is the
2631/// second operand.
2634 VPValue &BackedgeValue)
2635 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2636 &Start) {
2637 addOperand(&BackedgeValue);
2638 }
2639
2640 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2641
2646
2647 void execute(VPTransformState &State) override;
2648
2649 /// Return the cost of this first-order recurrence phi recipe.
2651 VPCostContext &Ctx) const override;
2652
2653 /// Returns true if the recipe only uses the first lane of operand \p Op.
2654 bool usesFirstLaneOnly(const VPValue *Op) const override {
2656 "Op must be an operand of the recipe");
2657 return Op == getStartValue();
2658 }
2659
2660protected:
2661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2662 /// Print the recipe.
2663 void printRecipe(raw_ostream &O, const Twine &Indent,
2664 VPSlotTracker &SlotTracker) const override;
2665#endif
2666};
2667
2668/// Possible variants of a reduction.
2669
2670/// This reduction is ordered and in-loop.
2671struct RdxOrdered {};
2672/// This reduction is in-loop.
2673struct RdxInLoop {};
2674/// This reduction is unordered with the partial result scaled down by some
2675/// factor.
2678};
2679using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2680
2681inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2682 unsigned ScaleFactor) {
2683 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2684 if (Ordered)
2685 return RdxOrdered{};
2686 if (InLoop)
2687 return RdxInLoop{};
2688 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2689}
2690
2691/// A recipe for handling reduction phis. The start value is the first operand
2692/// of the recipe and the incoming value from the backedge is the second
2693/// operand.
2695 /// The recurrence kind of the reduction.
2696 const RecurKind Kind;
2697
2698 ReductionStyle Style;
2699
2700 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2701 /// patterns for argmin/argmax).
2702 /// TODO: Also support cases where the phi itself has a single use, but its
2703 /// compare has multiple uses.
2704 bool HasUsesOutsideReductionChain;
2705
2706public:
2707 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2709 VPValue &BackedgeValue, ReductionStyle Style,
2710 const VPIRFlags &Flags,
2711 bool HasUsesOutsideReductionChain = false)
2712 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2713 VPIRFlags(Flags), Kind(Kind), Style(Style),
2714 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2715 addOperand(&BackedgeValue);
2716 }
2717
2718 ~VPReductionPHIRecipe() override = default;
2719
2721 return new VPReductionPHIRecipe(
2723 *getOperand(0), *getBackedgeValue(), Style, *this,
2724 HasUsesOutsideReductionChain);
2725 }
2726
2727 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2728
2729 /// Generate the phi/select nodes.
2730 void execute(VPTransformState &State) override;
2731
2732 /// Get the factor that the VF of this recipe's output should be scaled by, or
2733 /// 1 if it isn't scaled.
2734 unsigned getVFScaleFactor() const {
2735 auto *Partial = std::get_if<RdxUnordered>(&Style);
2736 return Partial ? Partial->VFScaleFactor : 1;
2737 }
2738
2739 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2740 /// > 1.
2741 void setVFScaleFactor(unsigned ScaleFactor) {
2742 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2743 Style = RdxUnordered{ScaleFactor};
2744 }
2745
2746 /// Returns the number of incoming values, also number of incoming blocks.
2747 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2748 /// incoming value, its start value.
2749 unsigned getNumIncoming() const override { return 2; }
2750
2751 /// Returns the recurrence kind of the reduction.
2752 RecurKind getRecurrenceKind() const { return Kind; }
2753
2754 /// Returns true, if the phi is part of an ordered reduction.
2755 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2756
2757 /// Returns true if the phi is part of an in-loop reduction.
2758 bool isInLoop() const {
2759 return std::holds_alternative<RdxInLoop>(Style) ||
2760 std::holds_alternative<RdxOrdered>(Style);
2761 }
2762
2763 /// Returns true if the reduction outputs a vector with a scaled down VF.
2764 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2765
2766 /// Returns true, if the phi is part of a multi-use reduction.
2768 return HasUsesOutsideReductionChain;
2769 }
2770
2771 /// Returns true if the recipe only uses the first lane of operand \p Op.
2772 bool usesFirstLaneOnly(const VPValue *Op) const override {
2774 "Op must be an operand of the recipe");
2775 return isOrdered() || isInLoop();
2776 }
2777
2778protected:
2779#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2780 /// Print the recipe.
2781 void printRecipe(raw_ostream &O, const Twine &Indent,
2782 VPSlotTracker &SlotTracker) const override;
2783#endif
2784};
2785
2786/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2787/// instructions.
2789public:
2790 /// The blend operation is a User of the incoming values and of their
2791 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2792 /// be omitted (implied by passing an odd number of operands) in which case
2793 /// all other incoming values are merged into it.
2795 const VPIRFlags &Flags, DebugLoc DL)
2796 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2797 assert(Operands.size() >= 2 && "Expected at least two operands!");
2798 setUnderlyingValue(Phi);
2799 }
2800
2801 VPBlendRecipe *clone() override {
2803 operands(), *this, getDebugLoc());
2804 }
2805
2806 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2807
2808 /// A normalized blend is one that has an odd number of operands, whereby the
2809 /// first operand does not have an associated mask.
2810 bool isNormalized() const { return getNumOperands() % 2; }
2811
2812 /// Return the number of incoming values, taking into account when normalized
2813 /// the first incoming value will have no mask.
2814 unsigned getNumIncomingValues() const {
2815 return (getNumOperands() + isNormalized()) / 2;
2816 }
2817
2818 /// Return incoming value number \p Idx.
2819 VPValue *getIncomingValue(unsigned Idx) const {
2820 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2821 }
2822
2823 /// Return mask number \p Idx.
2824 VPValue *getMask(unsigned Idx) const {
2825 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2826 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2827 }
2828
2829 /// Set mask number \p Idx to \p V.
2830 void setMask(unsigned Idx, VPValue *V) {
2831 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2832 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2833 }
2834
2835 void execute(VPTransformState &State) override {
2836 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2837 }
2838
2839 /// Return the cost of this VPWidenMemoryRecipe.
2840 InstructionCost computeCost(ElementCount VF,
2841 VPCostContext &Ctx) const override;
2842
2843 /// Returns true if the recipe only uses the first lane of operand \p Op.
2844 bool usesFirstLaneOnly(const VPValue *Op) const override;
2845
2846protected:
2847#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2848 /// Print the recipe.
2849 void printRecipe(raw_ostream &O, const Twine &Indent,
2850 VPSlotTracker &SlotTracker) const override;
2851#endif
2852};
2853
2854/// A common base class for interleaved memory operations.
2855/// An Interleaved memory operation is a memory access method that combines
2856/// multiple strided loads/stores into a single wide load/store with shuffles.
2857/// The first operand is the start address. The optional operands are, in order,
2858/// the stored values and the mask.
2860 public VPIRMetadata {
2862
2863 /// Indicates if the interleave group is in a conditional block and requires a
2864 /// mask.
2865 bool HasMask = false;
2866
2867 /// Indicates if gaps between members of the group need to be masked out or if
2868 /// unusued gaps can be loaded speculatively.
2869 bool NeedsMaskForGaps = false;
2870
2871protected:
2872 VPInterleaveBase(const unsigned char SC,
2874 ArrayRef<VPValue *> Operands,
2875 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2876 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2877 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2878 NeedsMaskForGaps(NeedsMaskForGaps) {
2879 // TODO: extend the masked interleaved-group support to reversed access.
2880 assert((!Mask || !IG->isReverse()) &&
2881 "Reversed masked interleave-group not supported.");
2882 if (StoredValues.empty()) {
2883 for (unsigned I = 0; I < IG->getFactor(); ++I)
2884 if (Instruction *Inst = IG->getMember(I)) {
2885 assert(!Inst->getType()->isVoidTy() && "must have result");
2886 new VPRecipeValue(this, Inst);
2887 }
2888 } else {
2889 for (auto *SV : StoredValues)
2890 addOperand(SV);
2891 }
2892 if (Mask) {
2893 HasMask = true;
2894 addOperand(Mask);
2895 }
2896 }
2897
2898public:
2899 VPInterleaveBase *clone() override = 0;
2900
2901 static inline bool classof(const VPRecipeBase *R) {
2902 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2903 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2904 }
2905
2906 static inline bool classof(const VPUser *U) {
2907 auto *R = dyn_cast<VPRecipeBase>(U);
2908 return R && classof(R);
2909 }
2910
2911 /// Return the address accessed by this recipe.
2912 VPValue *getAddr() const {
2913 return getOperand(0); // Address is the 1st, mandatory operand.
2914 }
2915
2916 /// Return the mask used by this recipe. Note that a full mask is represented
2917 /// by a nullptr.
2918 VPValue *getMask() const {
2919 // Mask is optional and the last operand.
2920 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2921 }
2922
2923 /// Return true if the access needs a mask because of the gaps.
2924 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2925
2927
2928 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2929
2930 void execute(VPTransformState &State) override {
2931 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2932 }
2933
2934 /// Return the cost of this recipe.
2935 InstructionCost computeCost(ElementCount VF,
2936 VPCostContext &Ctx) const override;
2937
2938 /// Returns true if the recipe only uses the first lane of operand \p Op.
2939 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2940
2941 /// Returns the number of stored operands of this interleave group. Returns 0
2942 /// for load interleave groups.
2943 virtual unsigned getNumStoreOperands() const = 0;
2944
2945 /// Return the VPValues stored by this interleave group. If it is a load
2946 /// interleave group, return an empty ArrayRef.
2948 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2950 }
2951};
2952
2953/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2954/// or stores into one wide load/store and shuffles. The first operand of a
2955/// VPInterleave recipe is the address, followed by the stored values, followed
2956/// by an optional mask.
2958public:
2960 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2961 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2962 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2963 Mask, NeedsMaskForGaps, MD, DL) {}
2964
2965 ~VPInterleaveRecipe() override = default;
2966
2970 needsMaskForGaps(), *this, getDebugLoc());
2971 }
2972
2973 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2974
2975 /// Generate the wide load or store, and shuffles.
2976 void execute(VPTransformState &State) override;
2977
2978 bool usesFirstLaneOnly(const VPValue *Op) const override {
2980 "Op must be an operand of the recipe");
2981 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2982 }
2983
2984 unsigned getNumStoreOperands() const override {
2985 return getNumOperands() - (getMask() ? 2 : 1);
2986 }
2987
2988protected:
2989#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2990 /// Print the recipe.
2991 void printRecipe(raw_ostream &O, const Twine &Indent,
2992 VPSlotTracker &SlotTracker) const override;
2993#endif
2994};
2995
2996/// A recipe for interleaved memory operations with vector-predication
2997/// intrinsics. The first operand is the address, the second operand is the
2998/// explicit vector length. Stored values and mask are optional operands.
3000public:
3002 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3003 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3004 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3005 R.getDebugLoc()) {
3006 assert(!getInterleaveGroup()->isReverse() &&
3007 "Reversed interleave-group with tail folding is not supported.");
3008 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3009 "supported for scalable vector.");
3010 }
3011
3012 ~VPInterleaveEVLRecipe() override = default;
3013
3015 llvm_unreachable("cloning not implemented yet");
3016 }
3017
3018 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3019
3020 /// The VPValue of the explicit vector length.
3021 VPValue *getEVL() const { return getOperand(1); }
3022
3023 /// Generate the wide load or store, and shuffles.
3024 void execute(VPTransformState &State) override;
3025
3026 /// The recipe only uses the first lane of the address, and EVL operand.
3027 bool usesFirstLaneOnly(const VPValue *Op) const override {
3029 "Op must be an operand of the recipe");
3030 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3031 Op == getEVL();
3032 }
3033
3034 unsigned getNumStoreOperands() const override {
3035 return getNumOperands() - (getMask() ? 3 : 2);
3036 }
3037
3038protected:
3039#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3040 /// Print the recipe.
3041 void printRecipe(raw_ostream &O, const Twine &Indent,
3042 VPSlotTracker &SlotTracker) const override;
3043#endif
3044};
3045
3046/// A recipe to represent inloop, ordered or partial reduction operations. It
3047/// performs a reduction on a vector operand into a scalar (vector in the case
3048/// of a partial reduction) value, and adds the result to a chain. The Operands
3049/// are {ChainOp, VecOp, [Condition]}.
3051
3052 /// The recurrence kind for the reduction in question.
3053 RecurKind RdxKind;
3054 /// Whether the reduction is conditional.
3055 bool IsConditional = false;
3056 ReductionStyle Style;
3057
3058protected:
3059 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3061 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3062 ReductionStyle Style, DebugLoc DL)
3063 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3064 Style(Style) {
3065 if (CondOp) {
3066 IsConditional = true;
3067 addOperand(CondOp);
3068 }
3070 }
3071
3072public:
3074 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3076 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3077 {ChainOp, VecOp}, CondOp, Style, DL) {}
3078
3080 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3082 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3083 {ChainOp, VecOp}, CondOp, Style, DL) {}
3084
3085 ~VPReductionRecipe() override = default;
3086
3088 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3090 getCondOp(), Style, getDebugLoc());
3091 }
3092
3093 static inline bool classof(const VPRecipeBase *R) {
3094 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3095 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3096 }
3097
3098 static inline bool classof(const VPUser *U) {
3099 auto *R = dyn_cast<VPRecipeBase>(U);
3100 return R && classof(R);
3101 }
3102
3103 static inline bool classof(const VPValue *VPV) {
3104 const VPRecipeBase *R = VPV->getDefiningRecipe();
3105 return R && classof(R);
3106 }
3107
3108 static inline bool classof(const VPSingleDefRecipe *R) {
3109 return classof(static_cast<const VPRecipeBase *>(R));
3110 }
3111
3112 /// Generate the reduction in the loop.
3113 void execute(VPTransformState &State) override;
3114
3115 /// Return the cost of VPReductionRecipe.
3116 InstructionCost computeCost(ElementCount VF,
3117 VPCostContext &Ctx) const override;
3118
3119 /// Return the recurrence kind for the in-loop reduction.
3120 RecurKind getRecurrenceKind() const { return RdxKind; }
3121 /// Return true if the in-loop reduction is ordered.
3122 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3123 /// Return true if the in-loop reduction is conditional.
3124 bool isConditional() const { return IsConditional; };
3125 /// Returns true if the reduction outputs a vector with a scaled down VF.
3126 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3127 /// Returns true if the reduction is in-loop.
3128 bool isInLoop() const {
3129 return std::holds_alternative<RdxInLoop>(Style) ||
3130 std::holds_alternative<RdxOrdered>(Style);
3131 }
3132 /// The VPValue of the scalar Chain being accumulated.
3133 VPValue *getChainOp() const { return getOperand(0); }
3134 /// The VPValue of the vector value to be reduced.
3135 VPValue *getVecOp() const { return getOperand(1); }
3136 /// The VPValue of the condition for the block.
3138 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3139 }
3140 /// Get the factor that the VF of this recipe's output should be scaled by, or
3141 /// 1 if it isn't scaled.
3142 unsigned getVFScaleFactor() const {
3143 auto *Partial = std::get_if<RdxUnordered>(&Style);
3144 return Partial ? Partial->VFScaleFactor : 1;
3145 }
3146
3147protected:
3148#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3149 /// Print the recipe.
3150 void printRecipe(raw_ostream &O, const Twine &Indent,
3151 VPSlotTracker &SlotTracker) const override;
3152#endif
3153};
3154
3155/// A recipe to represent inloop reduction operations with vector-predication
3156/// intrinsics, performing a reduction on a vector operand with the explicit
3157/// vector length (EVL) into a scalar value, and adding the result to a chain.
3158/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3160public:
3163 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3164 R.getFastMathFlags(),
3166 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3167 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3168 DL) {}
3169
3170 ~VPReductionEVLRecipe() override = default;
3171
3173 llvm_unreachable("cloning not implemented yet");
3174 }
3175
3176 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3177
3178 /// Generate the reduction in the loop
3179 void execute(VPTransformState &State) override;
3180
3181 /// The VPValue of the explicit vector length.
3182 VPValue *getEVL() const { return getOperand(2); }
3183
3184 /// Returns true if the recipe only uses the first lane of operand \p Op.
3185 bool usesFirstLaneOnly(const VPValue *Op) const override {
3187 "Op must be an operand of the recipe");
3188 return Op == getEVL();
3189 }
3190
3191protected:
3192#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3193 /// Print the recipe.
3194 void printRecipe(raw_ostream &O, const Twine &Indent,
3195 VPSlotTracker &SlotTracker) const override;
3196#endif
3197};
3198
3199/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3200/// copies of the original scalar type, one per lane, instead of producing a
3201/// single copy of widened type for all lanes. If the instruction is known to be
3202/// a single scalar, only one copy will be generated.
3204 public VPIRMetadata {
3205 /// Indicator if only a single replica per lane is needed.
3206 bool IsSingleScalar;
3207
3208 /// Indicator if the replicas are also predicated.
3209 bool IsPredicated;
3210
3211public:
3213 bool IsSingleScalar, VPValue *Mask = nullptr,
3214 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3215 DebugLoc DL = DebugLoc::getUnknown())
3216 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3217 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3218 IsPredicated(Mask) {
3219 setUnderlyingValue(I);
3220 if (Mask)
3221 addOperand(Mask);
3222 }
3223
3224 ~VPReplicateRecipe() override = default;
3225
3227 auto *Copy = new VPReplicateRecipe(
3228 getUnderlyingInstr(), operands(), IsSingleScalar,
3229 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3230 Copy->transferFlags(*this);
3231 return Copy;
3232 }
3233
3234 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3235
3236 /// Generate replicas of the desired Ingredient. Replicas will be generated
3237 /// for all parts and lanes unless a specific part and lane are specified in
3238 /// the \p State.
3239 void execute(VPTransformState &State) override;
3240
3241 /// Return the cost of this VPReplicateRecipe.
3242 InstructionCost computeCost(ElementCount VF,
3243 VPCostContext &Ctx) const override;
3244
3245 bool isSingleScalar() const { return IsSingleScalar; }
3246
3247 bool isPredicated() const { return IsPredicated; }
3248
3249 /// Returns true if the recipe only uses the first lane of operand \p Op.
3250 bool usesFirstLaneOnly(const VPValue *Op) const override {
3252 "Op must be an operand of the recipe");
3253 return isSingleScalar();
3254 }
3255
3256 /// Returns true if the recipe uses scalars of operand \p Op.
3257 bool usesScalars(const VPValue *Op) const override {
3259 "Op must be an operand of the recipe");
3260 return true;
3261 }
3262
3263 /// Returns true if the recipe is used by a widened recipe via an intervening
3264 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
3265 /// in a vector.
3266 bool shouldPack() const;
3267
3268 /// Return the mask of a predicated VPReplicateRecipe.
3270 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3271 return getOperand(getNumOperands() - 1);
3272 }
3273
3274 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3275
3276protected:
3277#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3278 /// Print the recipe.
3279 void printRecipe(raw_ostream &O, const Twine &Indent,
3280 VPSlotTracker &SlotTracker) const override;
3281#endif
3282};
3283
3284/// A recipe for generating conditional branches on the bits of a mask.
3286public:
3288 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3289
3292 }
3293
3294 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3295
3296 /// Generate the extraction of the appropriate bit from the block mask and the
3297 /// conditional branch.
3298 void execute(VPTransformState &State) override;
3299
3300 /// Return the cost of this VPBranchOnMaskRecipe.
3301 InstructionCost computeCost(ElementCount VF,
3302 VPCostContext &Ctx) const override;
3303
3304#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3305 /// Print the recipe.
3306 void printRecipe(raw_ostream &O, const Twine &Indent,
3307 VPSlotTracker &SlotTracker) const override {
3308 O << Indent << "BRANCH-ON-MASK ";
3310 }
3311#endif
3312
3313 /// Returns true if the recipe uses scalars of operand \p Op.
3314 bool usesScalars(const VPValue *Op) const override {
3316 "Op must be an operand of the recipe");
3317 return true;
3318 }
3319};
3320
3321/// A recipe to combine multiple recipes into a single 'expression' recipe,
3322/// which should be considered a single entity for cost-modeling and transforms.
3323/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3324/// expression recipes, before execute. The individual expression recipes are
3325/// completely disconnected from the def-use graph of other recipes not part of
3326/// the expression. Def-use edges between pairs of expression recipes remain
3327/// intact, whereas every edge between an expression recipe and a recipe outside
3328/// the expression is elevated to connect the non-expression recipe with the
3329/// VPExpressionRecipe itself.
3330class VPExpressionRecipe : public VPSingleDefRecipe {
3331 /// Recipes included in this VPExpressionRecipe. This could contain
3332 /// duplicates.
3333 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3334
3335 /// Temporary VPValues used for external operands of the expression, i.e.
3336 /// operands not defined by recipes in the expression.
3337 SmallVector<VPValue *> LiveInPlaceholders;
3338
3339 enum class ExpressionTypes {
3340 /// Represents an inloop extended reduction operation, performing a
3341 /// reduction on an extended vector operand into a scalar value, and adding
3342 /// the result to a chain.
3343 ExtendedReduction,
3344 /// Represent an inloop multiply-accumulate reduction, multiplying the
3345 /// extended vector operands, performing a reduction.add on the result, and
3346 /// adding the scalar result to a chain.
3347 ExtMulAccReduction,
3348 /// Represent an inloop multiply-accumulate reduction, multiplying the
3349 /// vector operands, performing a reduction.add on the result, and adding
3350 /// the scalar result to a chain.
3351 MulAccReduction,
3352 /// Represent an inloop multiply-accumulate reduction, multiplying the
3353 /// extended vector operands, negating the multiplication, performing a
3354 /// reduction.add on the result, and adding the scalar result to a chain.
3355 ExtNegatedMulAccReduction,
3356 };
3357
3358 /// Type of the expression.
3359 ExpressionTypes ExpressionType;
3360
3361 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3362 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3363 /// in the expression) are replaced by temporary VPValues and the original
3364 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3365 /// as needed (excluding last) to ensure they are only used by other recipes
3366 /// in the expression.
3367 VPExpressionRecipe(ExpressionTypes ExpressionType,
3368 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3369
3370public:
3372 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3374 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3377 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3378 {Ext0, Ext1, Mul, Red}) {}
3381 VPReductionRecipe *Red)
3382 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3383 {Ext0, Ext1, Mul, Sub, Red}) {
3384 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3385 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3386 "Expected an add reduction");
3387 assert(getNumOperands() >= 3 && "Expected at least three operands");
3388 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3389 assert(SubConst && SubConst->isZero() &&
3390 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3391 }
3392
3394 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3395 for (auto *R : reverse(ExpressionRecipes)) {
3396 if (ExpressionRecipesSeen.insert(R).second)
3397 delete R;
3398 }
3399 for (VPValue *T : LiveInPlaceholders)
3400 delete T;
3401 }
3402
3403 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3404
3405 VPExpressionRecipe *clone() override {
3406 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3407 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3408 for (auto *R : ExpressionRecipes)
3409 NewExpressiondRecipes.push_back(R->clone());
3410 for (auto *New : NewExpressiondRecipes) {
3411 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3412 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3413 // Update placeholder operands in the cloned recipe to use the external
3414 // operands, to be internalized when the cloned expression is constructed.
3415 for (const auto &[Placeholder, OutsideOp] :
3416 zip(LiveInPlaceholders, operands()))
3417 New->replaceUsesOfWith(Placeholder, OutsideOp);
3418 }
3419 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3420 }
3421
3422 /// Return the VPValue to use to infer the result type of the recipe.
3424 unsigned OpIdx =
3425 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3426 : 1;
3427 return getOperand(getNumOperands() - OpIdx);
3428 }
3429
3430 /// Insert the recipes of the expression back into the VPlan, directly before
3431 /// the current recipe. Leaves the expression recipe empty, which must be
3432 /// removed before codegen.
3433 void decompose();
3434
3435 unsigned getVFScaleFactor() const {
3436 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3437 return PR ? PR->getVFScaleFactor() : 1;
3438 }
3439
3440 /// Method for generating code, must not be called as this recipe is abstract.
3441 void execute(VPTransformState &State) override {
3442 llvm_unreachable("recipe must be removed before execute");
3443 }
3444
3446 VPCostContext &Ctx) const override;
3447
3448 /// Returns true if this expression contains recipes that may read from or
3449 /// write to memory.
3450 bool mayReadOrWriteMemory() const;
3451
3452 /// Returns true if this expression contains recipes that may have side
3453 /// effects.
3454 bool mayHaveSideEffects() const;
3455
3456 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3457 bool isSingleScalar() const;
3458
3459protected:
3460#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3461 /// Print the recipe.
3462 void printRecipe(raw_ostream &O, const Twine &Indent,
3463 VPSlotTracker &SlotTracker) const override;
3464#endif
3465};
3466
3467/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3468/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3469/// order to merge values that are set under such a branch and feed their uses.
3470/// The phi nodes can be scalar or vector depending on the users of the value.
3471/// This recipe works in concert with VPBranchOnMaskRecipe.
3473public:
3474 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3475 /// nodes after merging back from a Branch-on-Mask.
3477 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3478 ~VPPredInstPHIRecipe() override = default;
3479
3481 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3482 }
3483
3484 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3485
3486 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3487 /// retain SSA form.
3488 void execute(VPTransformState &State) override;
3489
3490 /// Return the cost of this VPPredInstPHIRecipe.
3492 VPCostContext &Ctx) const override {
3493 // TODO: Compute accurate cost after retiring the legacy cost model.
3494 return 0;
3495 }
3496
3497 /// Returns true if the recipe uses scalars of operand \p Op.
3498 bool usesScalars(const VPValue *Op) const override {
3500 "Op must be an operand of the recipe");
3501 return true;
3502 }
3503
3504protected:
3505#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3506 /// Print the recipe.
3507 void printRecipe(raw_ostream &O, const Twine &Indent,
3508 VPSlotTracker &SlotTracker) const override;
3509#endif
3510};
3511
3512/// A common base class for widening memory operations. An optional mask can be
3513/// provided as the last operand.
3515 public VPIRMetadata {
3516protected:
3518
3519 /// Alignment information for this memory access.
3521
3522 /// Whether the accessed addresses are consecutive.
3524
3525 /// Whether the memory access is masked.
3526 bool IsMasked = false;
3527
3528 void setMask(VPValue *Mask) {
3529 assert(!IsMasked && "cannot re-set mask");
3530 if (!Mask)
3531 return;
3532 addOperand(Mask);
3533 IsMasked = true;
3534 }
3535
3536 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3537 std::initializer_list<VPValue *> Operands,
3538 bool Consecutive, const VPIRMetadata &Metadata,
3539 DebugLoc DL)
3540 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3542
3543public:
3545 llvm_unreachable("cloning not supported");
3546 }
3547
3548 static inline bool classof(const VPRecipeBase *R) {
3549 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3550 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3551 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3552 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3553 }
3554
3555 static inline bool classof(const VPUser *U) {
3556 auto *R = dyn_cast<VPRecipeBase>(U);
3557 return R && classof(R);
3558 }
3559
3560 /// Return whether the loaded-from / stored-to addresses are consecutive.
3561 bool isConsecutive() const { return Consecutive; }
3562
3563 /// Return the address accessed by this recipe.
3564 VPValue *getAddr() const { return getOperand(0); }
3565
3566 /// Returns true if the recipe is masked.
3567 bool isMasked() const { return IsMasked; }
3568
3569 /// Return the mask used by this recipe. Note that a full mask is represented
3570 /// by a nullptr.
3571 VPValue *getMask() const {
3572 // Mask is optional and therefore the last operand.
3573 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3574 }
3575
3576 /// Returns the alignment of the memory access.
3577 Align getAlign() const { return Alignment; }
3578
3579 /// Generate the wide load/store.
3580 void execute(VPTransformState &State) override {
3581 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3582 }
3583
3584 /// Return the cost of this VPWidenMemoryRecipe.
3585 InstructionCost computeCost(ElementCount VF,
3586 VPCostContext &Ctx) const override;
3587
3589};
3590
3591/// A recipe for widening load operations, using the address to load from and an
3592/// optional mask.
3594 public VPRecipeValue {
3596 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3597 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3598 Consecutive, Metadata, DL),
3599 VPRecipeValue(this, &Load) {
3600 setMask(Mask);
3601 }
3602
3605 getMask(), Consecutive, *this, getDebugLoc());
3606 }
3607
3608 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3609
3610 /// Generate a wide load or gather.
3611 void execute(VPTransformState &State) override;
3612
3613 /// Returns true if the recipe only uses the first lane of operand \p Op.
3614 bool usesFirstLaneOnly(const VPValue *Op) const override {
3616 "Op must be an operand of the recipe");
3617 // Widened, consecutive loads operations only demand the first lane of
3618 // their address.
3619 return Op == getAddr() && isConsecutive();
3620 }
3621
3622protected:
3623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3624 /// Print the recipe.
3625 void printRecipe(raw_ostream &O, const Twine &Indent,
3626 VPSlotTracker &SlotTracker) const override;
3627#endif
3628};
3629
3630/// A recipe for widening load operations with vector-predication intrinsics,
3631/// using the address to load from, the explicit vector length and an optional
3632/// mask.
3634 public VPRecipeValue {
3636 VPValue *Mask)
3637 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3638 {Addr, &EVL}, L.isConsecutive(), L,
3639 L.getDebugLoc()),
3640 VPRecipeValue(this, &getIngredient()) {
3641 setMask(Mask);
3642 }
3643
3644 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3645
3646 /// Return the EVL operand.
3647 VPValue *getEVL() const { return getOperand(1); }
3648
3649 /// Generate the wide load or gather.
3650 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3651
3652 /// Return the cost of this VPWidenLoadEVLRecipe.
3654 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3655
3656 /// Returns true if the recipe only uses the first lane of operand \p Op.
3657 bool usesFirstLaneOnly(const VPValue *Op) const override {
3659 "Op must be an operand of the recipe");
3660 // Widened loads only demand the first lane of EVL and consecutive loads
3661 // only demand the first lane of their address.
3662 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3663 }
3664
3665protected:
3666#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3667 /// Print the recipe.
3668 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3669 VPSlotTracker &SlotTracker) const override;
3670#endif
3671};
3672
3673/// A recipe for widening store operations, using the stored value, the address
3674/// to store to and an optional mask.
3676 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3677 VPValue *Mask, bool Consecutive,
3678 const VPIRMetadata &Metadata, DebugLoc DL)
3679 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3680 {Addr, StoredVal}, Consecutive, Metadata, DL) {
3681 setMask(Mask);
3682 }
3683
3687 *this, getDebugLoc());
3688 }
3689
3690 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3691
3692 /// Return the value stored by this recipe.
3693 VPValue *getStoredValue() const { return getOperand(1); }
3694
3695 /// Generate a wide store or scatter.
3696 void execute(VPTransformState &State) override;
3697
3698 /// Returns true if the recipe only uses the first lane of operand \p Op.
3699 bool usesFirstLaneOnly(const VPValue *Op) const override {
3701 "Op must be an operand of the recipe");
3702 // Widened, consecutive stores only demand the first lane of their address,
3703 // unless the same operand is also stored.
3704 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3705 }
3706
3707protected:
3708#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3709 /// Print the recipe.
3710 void printRecipe(raw_ostream &O, const Twine &Indent,
3711 VPSlotTracker &SlotTracker) const override;
3712#endif
3713};
3714
3715/// A recipe for widening store operations with vector-predication intrinsics,
3716/// using the value to store, the address to store to, the explicit vector
3717/// length and an optional mask.
3720 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3721 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3722 {Addr, StoredVal, &EVL}, S.isConsecutive(), S,
3723 S.getDebugLoc()) {
3724 setMask(Mask);
3725 }
3726
3727 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3728
3729 /// Return the address accessed by this recipe.
3730 VPValue *getStoredValue() const { return getOperand(1); }
3731
3732 /// Return the EVL operand.
3733 VPValue *getEVL() const { return getOperand(2); }
3734
3735 /// Generate the wide store or scatter.
3736 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3737
3738 /// Return the cost of this VPWidenStoreEVLRecipe.
3740 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3741
3742 /// Returns true if the recipe only uses the first lane of operand \p Op.
3743 bool usesFirstLaneOnly(const VPValue *Op) const override {
3745 "Op must be an operand of the recipe");
3746 if (Op == getEVL()) {
3747 assert(getStoredValue() != Op && "unexpected store of EVL");
3748 return true;
3749 }
3750 // Widened, consecutive memory operations only demand the first lane of
3751 // their address, unless the same operand is also stored. That latter can
3752 // happen with opaque pointers.
3753 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3754 }
3755
3756protected:
3757#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3758 /// Print the recipe.
3759 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3760 VPSlotTracker &SlotTracker) const override;
3761#endif
3762};
3763
3764/// Recipe to expand a SCEV expression.
3766 const SCEV *Expr;
3767
3768public:
3770 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3771
3772 ~VPExpandSCEVRecipe() override = default;
3773
3774 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3775
3776 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3777
3778 void execute(VPTransformState &State) override {
3779 llvm_unreachable("SCEV expressions must be expanded before final execute");
3780 }
3781
3782 /// Return the cost of this VPExpandSCEVRecipe.
3784 VPCostContext &Ctx) const override {
3785 // TODO: Compute accurate cost after retiring the legacy cost model.
3786 return 0;
3787 }
3788
3789 const SCEV *getSCEV() const { return Expr; }
3790
3791protected:
3792#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3793 /// Print the recipe.
3794 void printRecipe(raw_ostream &O, const Twine &Indent,
3795 VPSlotTracker &SlotTracker) const override;
3796#endif
3797};
3798
3799/// A recipe for generating the active lane mask for the vector loop that is
3800/// used to predicate the vector operations.
3802public:
3804 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3805 StartMask, DL) {}
3806
3807 ~VPActiveLaneMaskPHIRecipe() override = default;
3808
3811 if (getNumOperands() == 2)
3812 R->addOperand(getOperand(1));
3813 return R;
3814 }
3815
3816 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3817
3818 /// Generate the active lane mask phi of the vector loop.
3819 void execute(VPTransformState &State) override;
3820
3821protected:
3822#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3823 /// Print the recipe.
3824 void printRecipe(raw_ostream &O, const Twine &Indent,
3825 VPSlotTracker &SlotTracker) const override;
3826#endif
3827};
3828
3829/// A recipe for generating the phi node tracking the current scalar iteration
3830/// index. It starts at the start value of the canonical induction and gets
3831/// incremented by the number of scalar iterations processed by the vector loop
3832/// iteration. The increment does not have to be loop invariant.
3834public:
3836 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3837 StartIV, DL) {}
3838
3839 ~VPCurrentIterationPHIRecipe() override = default;
3840
3842 llvm_unreachable("cloning not implemented yet");
3843 }
3844
3845 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3846
3847 void execute(VPTransformState &State) override {
3848 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3849 "scalar phi recipe");
3850 }
3851
3852 /// Return the cost of this VPCurrentIterationPHIRecipe.
3854 VPCostContext &Ctx) const override {
3855 // For now, match the behavior of the legacy cost model.
3856 return 0;
3857 }
3858
3859 /// Returns true if the recipe only uses the first lane of operand \p Op.
3860 bool usesFirstLaneOnly(const VPValue *Op) const override {
3862 "Op must be an operand of the recipe");
3863 return true;
3864 }
3865
3866protected:
3867#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3868 /// Print the recipe.
3869 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3870 VPSlotTracker &SlotTracker) const override;
3871#endif
3872};
3873
3874/// A Recipe for widening the canonical induction variable of the vector loop.
3876 public VPUnrollPartAccessor<1> {
3877public:
3879 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3880
3881 ~VPWidenCanonicalIVRecipe() override = default;
3882
3886
3887 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3888
3889 /// Generate a canonical vector induction variable of the vector loop, with
3890 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3891 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3892 void execute(VPTransformState &State) override;
3893
3894 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3896 VPCostContext &Ctx) const override {
3897 // TODO: Compute accurate cost after retiring the legacy cost model.
3898 return 0;
3899 }
3900
3901 /// Return the canonical IV being widened.
3905
3906protected:
3907#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3908 /// Print the recipe.
3909 void printRecipe(raw_ostream &O, const Twine &Indent,
3910 VPSlotTracker &SlotTracker) const override;
3911#endif
3912};
3913
3914/// A recipe for converting the input value \p IV value to the corresponding
3915/// value of an IV with different start and step values, using Start + IV *
3916/// Step.
3918 /// Kind of the induction.
3920 /// If not nullptr, the floating point induction binary operator. Must be set
3921 /// for floating point inductions.
3922 const FPMathOperator *FPBinOp;
3923
3924 /// Name to use for the generated IR instruction for the derived IV.
3925 std::string Name;
3926
3927public:
3929 VPValue *CanonicalIV, VPValue *Step, const Twine &Name = "")
3931 IndDesc.getKind(),
3932 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3933 Start, CanonicalIV, Step, Name) {}
3934
3936 const FPMathOperator *FPBinOp, VPIRValue *Start,
3937 VPValue *IV, VPValue *Step, const Twine &Name = "")
3938 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3939 Kind(Kind), FPBinOp(FPBinOp), Name(Name.str()) {}
3940
3941 ~VPDerivedIVRecipe() override = default;
3942
3944 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3945 getStepValue());
3946 }
3947
3948 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3949
3950 /// Generate the transformed value of the induction at offset StartValue (1.
3951 /// operand) + IV (2. operand) * StepValue (3, operand).
3952 void execute(VPTransformState &State) override;
3953
3954 /// Return the cost of this VPDerivedIVRecipe.
3956 VPCostContext &Ctx) const override {
3957 // TODO: Compute accurate cost after retiring the legacy cost model.
3958 return 0;
3959 }
3960
3961 Type *getScalarType() const { return getStartValue()->getType(); }
3962
3964 VPValue *getStepValue() const { return getOperand(2); }
3965
3966 /// Returns true if the recipe only uses the first lane of operand \p Op.
3967 bool usesFirstLaneOnly(const VPValue *Op) const override {
3969 "Op must be an operand of the recipe");
3970 return true;
3971 }
3972
3973protected:
3974#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3975 /// Print the recipe.
3976 void printRecipe(raw_ostream &O, const Twine &Indent,
3977 VPSlotTracker &SlotTracker) const override;
3978#endif
3979};
3980
3981/// A recipe for handling phi nodes of integer and floating-point inductions,
3982/// producing their scalar values. Before unrolling by UF the recipe represents
3983/// the VF*UF scalar values to be produced, or UF scalar values if only first
3984/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3985/// operand StartIndex to all unroll parts except part 0, as the recipe
3986/// represents the VF scalar values (this number of values is taken from
3987/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3989 Instruction::BinaryOps InductionOpcode;
3990
3991public:
3994 DebugLoc DL)
3995 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
3996 FMFs, DL),
3997 InductionOpcode(Opcode) {}
3998
4000 VPValue *Step, VPValue *VF,
4003 IV, Step, VF, IndDesc.getInductionOpcode(),
4004 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4005 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4006 : FastMathFlags(),
4007 DL) {}
4008
4009 ~VPScalarIVStepsRecipe() override = default;
4010
4012 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4013 getOperand(2), InductionOpcode,
4015 if (VPValue *StartIndex = getStartIndex())
4016 NewR->setStartIndex(StartIndex);
4017 return NewR;
4018 }
4019
4020 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4021
4022 /// Generate the scalarized versions of the phi node as needed by their users.
4023 void execute(VPTransformState &State) override;
4024
4025 /// Return the cost of this VPScalarIVStepsRecipe.
4027 VPCostContext &Ctx) const override {
4028 // TODO: Compute accurate cost after retiring the legacy cost model.
4029 return 0;
4030 }
4031
4032 VPValue *getStepValue() const { return getOperand(1); }
4033
4034 /// Return the number of scalars to produce per unroll part, used to compute
4035 /// StartIndex during unrolling.
4036 VPValue *getVFValue() const { return getOperand(2); }
4037
4038 /// Return the StartIndex, or null if known to be zero, valid only after
4039 /// unrolling.
4041 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4042 }
4043
4044 /// Set or add the StartIndex operand.
4045 void setStartIndex(VPValue *StartIndex) {
4046 if (getNumOperands() == 4)
4047 setOperand(3, StartIndex);
4048 else
4049 addOperand(StartIndex);
4050 }
4051
4052 /// Returns true if the recipe only uses the first lane of operand \p Op.
4053 bool usesFirstLaneOnly(const VPValue *Op) const override {
4055 "Op must be an operand of the recipe");
4056 return true;
4057 }
4058
4059 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4060
4061protected:
4062#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4063 /// Print the recipe.
4064 void printRecipe(raw_ostream &O, const Twine &Indent,
4065 VPSlotTracker &SlotTracker) const override;
4066#endif
4067};
4068
4069/// Support casting from VPRecipeBase -> VPPhiAccessors.
4070template <>
4074 /// Used by isa.
4075 static inline bool isPossible(VPRecipeBase *R) {
4076 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4078 }
4079
4080 /// Used by cast.
4082 switch (R->getVPRecipeID()) {
4083 case VPRecipeBase::VPInstructionSC:
4084 return cast<VPPhi>(R);
4085 case VPRecipeBase::VPIRInstructionSC:
4086 return cast<VPIRPhi>(R);
4087 case VPRecipeBase::VPWidenPHISC:
4088 return cast<VPWidenPHIRecipe>(R);
4089 default:
4090 return cast<VPHeaderPHIRecipe>(R);
4091 }
4092 }
4093
4094 /// Used by inherited doCastIfPossible to dyn_cast.
4095 static inline VPPhiAccessors *castFailed() { return nullptr; }
4096};
4097
4098template <>
4103template <>
4105 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4106 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4107
4108/// Support casting from VPRecipeBase -> VPIRMetadata.
4109template <>
4113 /// Used by isa.
4114 static inline bool isPossible(VPRecipeBase *R) {
4115 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here.
4120 R);
4121 }
4122
4123 /// Used by cast.
4124 static inline VPIRMetadata *doCast(VPRecipeBase *R) {
4125 switch (R->getVPRecipeID()) {
4126 case VPRecipeBase::VPInstructionSC:
4127 return cast<VPInstruction>(R);
4128 case VPRecipeBase::VPWidenSC:
4129 return cast<VPWidenRecipe>(R);
4130 case VPRecipeBase::VPWidenCastSC:
4131 return cast<VPWidenCastRecipe>(R);
4132 case VPRecipeBase::VPWidenIntrinsicSC:
4134 case VPRecipeBase::VPWidenCallSC:
4135 return cast<VPWidenCallRecipe>(R);
4136 case VPRecipeBase::VPReplicateSC:
4137 return cast<VPReplicateRecipe>(R);
4138 case VPRecipeBase::VPInterleaveSC:
4139 case VPRecipeBase::VPInterleaveEVLSC:
4140 return cast<VPInterleaveBase>(R);
4141 case VPRecipeBase::VPWidenLoadSC:
4142 case VPRecipeBase::VPWidenLoadEVLSC:
4143 case VPRecipeBase::VPWidenStoreSC:
4144 case VPRecipeBase::VPWidenStoreEVLSC:
4145 return cast<VPWidenMemoryRecipe>(R);
4146 default:
4147 llvm_unreachable("Illegal recipe for VPIRMetadata cast");
4148 }
4149 }
4150
4151 /// Used by inherited doCastIfPossible to dyn_cast.
4152 static inline VPIRMetadata *castFailed() { return nullptr; }
4153};
4154
4155template <>
4160template <>
4162 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4163 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4164
4165/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4166/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4167/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4168class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4169 friend class VPlan;
4170
4171 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4172 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4173 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4174 if (Recipe)
4175 appendRecipe(Recipe);
4176 }
4177
4178public:
4180
4181protected:
4182 /// The VPRecipes held in the order of output instructions to generate.
4184
4185 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4186 : VPBlockBase(BlockSC, Name.str()) {}
4187
4188public:
4189 ~VPBasicBlock() override {
4190 while (!Recipes.empty())
4191 Recipes.pop_back();
4192 }
4193
4194 /// Instruction iterators...
4199
4200 //===--------------------------------------------------------------------===//
4201 /// Recipe iterator methods
4202 ///
4203 inline iterator begin() { return Recipes.begin(); }
4204 inline const_iterator begin() const { return Recipes.begin(); }
4205 inline iterator end() { return Recipes.end(); }
4206 inline const_iterator end() const { return Recipes.end(); }
4207
4208 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4209 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4210 inline reverse_iterator rend() { return Recipes.rend(); }
4211 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4212
4213 inline size_t size() const { return Recipes.size(); }
4214 inline bool empty() const { return Recipes.empty(); }
4215 inline const VPRecipeBase &front() const { return Recipes.front(); }
4216 inline VPRecipeBase &front() { return Recipes.front(); }
4217 inline const VPRecipeBase &back() const { return Recipes.back(); }
4218 inline VPRecipeBase &back() { return Recipes.back(); }
4219
4220 /// Returns a reference to the list of recipes.
4222
4223 /// Returns a pointer to a member of the recipe list.
4224 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4225 return &VPBasicBlock::Recipes;
4226 }
4227
4228 /// Method to support type inquiry through isa, cast, and dyn_cast.
4229 static inline bool classof(const VPBlockBase *V) {
4230 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4231 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4232 }
4233
4234 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4235 assert(Recipe && "No recipe to append.");
4236 assert(!Recipe->Parent && "Recipe already in VPlan");
4237 Recipe->Parent = this;
4238 Recipes.insert(InsertPt, Recipe);
4239 }
4240
4241 /// Augment the existing recipes of a VPBasicBlock with an additional
4242 /// \p Recipe as the last recipe.
4243 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4244
4245 /// The method which generates the output IR instructions that correspond to
4246 /// this VPBasicBlock, thereby "executing" the VPlan.
4247 void execute(VPTransformState *State) override;
4248
4249 /// Return the cost of this VPBasicBlock.
4250 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4251
4252 /// Return the position of the first non-phi node recipe in the block.
4253 iterator getFirstNonPhi();
4254
4255 /// Returns an iterator range over the PHI-like recipes in the block.
4259
4260 /// Split current block at \p SplitAt by inserting a new block between the
4261 /// current block and its successors and moving all recipes starting at
4262 /// SplitAt to the new block. Returns the new block.
4263 VPBasicBlock *splitAt(iterator SplitAt);
4264
4265 VPRegionBlock *getEnclosingLoopRegion();
4266 const VPRegionBlock *getEnclosingLoopRegion() const;
4267
4268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4269 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4270 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4271 ///
4272 /// Note that the numbering is applied to the whole VPlan, so printing
4273 /// individual blocks is consistent with the whole VPlan printing.
4274 void print(raw_ostream &O, const Twine &Indent,
4275 VPSlotTracker &SlotTracker) const override;
4276 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4277#endif
4278
4279 /// If the block has multiple successors, return the branch recipe terminating
4280 /// the block. If there are no or only a single successor, return nullptr;
4281 VPRecipeBase *getTerminator();
4282 const VPRecipeBase *getTerminator() const;
4283
4284 /// Returns true if the block is exiting it's parent region.
4285 bool isExiting() const;
4286
4287 /// Clone the current block and it's recipes, without updating the operands of
4288 /// the cloned recipes.
4289 VPBasicBlock *clone() override;
4290
4291 /// Returns the predecessor block at index \p Idx with the predecessors as per
4292 /// the corresponding plain CFG. If the block is an entry block to a region,
4293 /// the first predecessor is the single predecessor of a region, and the
4294 /// second predecessor is the exiting block of the region.
4295 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4296
4297protected:
4298 /// Execute the recipes in the IR basic block \p BB.
4299 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4300
4301 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4302 /// generated for this VPBB.
4303 void connectToPredecessors(VPTransformState &State);
4304
4305private:
4306 /// Create an IR BasicBlock to hold the output instructions generated by this
4307 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4308 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4309};
4310
4311inline const VPBasicBlock *
4313 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4314}
4315
4316/// A special type of VPBasicBlock that wraps an existing IR basic block.
4317/// Recipes of the block get added before the first non-phi instruction in the
4318/// wrapped block.
4319/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4320/// preheader block.
4321class VPIRBasicBlock : public VPBasicBlock {
4322 friend class VPlan;
4323
4324 BasicBlock *IRBB;
4325
4326 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4327 VPIRBasicBlock(BasicBlock *IRBB)
4328 : VPBasicBlock(VPIRBasicBlockSC,
4329 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4330 IRBB(IRBB) {}
4331
4332public:
4333 ~VPIRBasicBlock() override = default;
4334
4335 static inline bool classof(const VPBlockBase *V) {
4336 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4337 }
4338
4339 /// The method which generates the output IR instructions that correspond to
4340 /// this VPBasicBlock, thereby "executing" the VPlan.
4341 void execute(VPTransformState *State) override;
4342
4343 VPIRBasicBlock *clone() override;
4344
4345 BasicBlock *getIRBasicBlock() const { return IRBB; }
4346};
4347
4348/// Track information about the canonical IV value of a region.
4349/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4351 /// VPRegionValue for the canonical IV, whose allocation is managed by
4352 /// VPCanonicalIVInfo.
4353 std::unique_ptr<VPRegionValue> CanIV;
4354
4355 /// Whether the increment of the canonical IV may unsigned wrap or not.
4356 bool HasNUW = true;
4357
4358public:
4360 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4361
4362 VPRegionValue *getRegionValue() { return CanIV.get(); }
4363 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4364
4365 bool hasNUW() const { return HasNUW; }
4366
4367 void clearNUW() { HasNUW = false; }
4368};
4369
4370/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4371/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4372/// A VPRegionBlock may indicate that its contents are to be replicated several
4373/// times. This is designed to support predicated scalarization, in which a
4374/// scalar if-then code structure needs to be generated VF * UF times. Having
4375/// this replication indicator helps to keep a single model for multiple
4376/// candidate VF's. The actual replication takes place only once the desired VF
4377/// and UF have been determined.
4378class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4379 friend class VPlan;
4380
4381 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4382 VPBlockBase *Entry;
4383
4384 /// Hold the Single Exiting block of the SESE region modelled by the
4385 /// VPRegionBlock.
4386 VPBlockBase *Exiting;
4387
4388 /// Holds the Canonical IV of the loop region along with additional
4389 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4390 /// Loop regions retain their canonical IVs until they are dissolved, even if
4391 /// the canonical IV has no users.
4392 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4393
4394 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4395 /// VPRegionBlocks.
4396 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4397 const std::string &Name = "")
4398 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4399 if (Entry) {
4400 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4401 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4402 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4403 Entry->setParent(this);
4404 Exiting->setParent(this);
4405 }
4406 }
4407
4408 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4409 VPBlockBase *Exiting, const std::string &Name = "")
4410 : VPRegionBlock(Entry, Exiting, Name) {
4411 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4412 }
4413
4414public:
4415 ~VPRegionBlock() override = default;
4416
4417 /// Method to support type inquiry through isa, cast, and dyn_cast.
4418 static inline bool classof(const VPBlockBase *V) {
4419 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4420 }
4421
4422 const VPBlockBase *getEntry() const { return Entry; }
4423 VPBlockBase *getEntry() { return Entry; }
4424
4425 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4426 /// EntryBlock must have no predecessors.
4427 void setEntry(VPBlockBase *EntryBlock) {
4428 assert(!EntryBlock->hasPredecessors() &&
4429 "Entry block cannot have predecessors.");
4430 Entry = EntryBlock;
4431 EntryBlock->setParent(this);
4432 }
4433
4434 const VPBlockBase *getExiting() const { return Exiting; }
4435 VPBlockBase *getExiting() { return Exiting; }
4436
4437 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4438 /// ExitingBlock must have no successors.
4439 void setExiting(VPBlockBase *ExitingBlock) {
4440 assert(!ExitingBlock->hasSuccessors() &&
4441 "Exit block cannot have successors.");
4442 Exiting = ExitingBlock;
4443 ExitingBlock->setParent(this);
4444 }
4445
4446 /// Returns the pre-header VPBasicBlock of the loop region.
4448 assert(!isReplicator() && "should only get pre-header of loop regions");
4449 return getSinglePredecessor()->getExitingBasicBlock();
4450 }
4451
4452 /// An indicator whether this region is to generate multiple replicated
4453 /// instances of output IR corresponding to its VPBlockBases.
4454 bool isReplicator() const { return !CanIVInfo; }
4455
4456 /// The method which generates the output IR instructions that correspond to
4457 /// this VPRegionBlock, thereby "executing" the VPlan.
4458 void execute(VPTransformState *State) override;
4459
4460 // Return the cost of this region.
4461 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4462
4463#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4464 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4465 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4466 /// consequtive numbers.
4467 ///
4468 /// Note that the numbering is applied to the whole VPlan, so printing
4469 /// individual regions is consistent with the whole VPlan printing.
4470 void print(raw_ostream &O, const Twine &Indent,
4471 VPSlotTracker &SlotTracker) const override;
4472 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4473#endif
4474
4475 /// Clone all blocks in the single-entry single-exit region of the block and
4476 /// their recipes without updating the operands of the cloned recipes.
4477 VPRegionBlock *clone() override;
4478
4479 /// Remove the current region from its VPlan, connecting its predecessor to
4480 /// its entry, and its exiting block to its successor.
4481 void dissolveToCFGLoop();
4482
4483 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4484 /// a new increment before the terminator and return it. The canonical IV
4485 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4486 VPInstruction *getOrCreateCanonicalIVIncrement();
4487
4488 /// Return the canonical induction variable of the region, null for
4489 /// replicating regions.
4491 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4492 }
4494 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4495 }
4496
4497 /// Return the type of the canonical IV for loop regions.
4499 return CanIVInfo->getRegionValue()->getType();
4500 }
4501
4502 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4503 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4504
4505 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4507 assert(Increment && "Must provide increment to clear");
4508 Increment->dropPoisonGeneratingFlags();
4509 CanIVInfo->clearNUW();
4510 }
4511};
4512
4514 return getParent()->getParent();
4515}
4516
4518 return getParent()->getParent();
4519}
4520
4521/// VPlan models a candidate for vectorization, encoding various decisions take
4522/// to produce efficient output IR, including which branches, basic-blocks and
4523/// output IR instructions to generate, and their cost. VPlan holds a
4524/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4525/// VPBasicBlock.
4526class VPlan {
4527 friend class VPlanPrinter;
4528 friend class VPSlotTracker;
4529
4530 /// VPBasicBlock corresponding to the original preheader. Used to place
4531 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4532 /// rest of VPlan execution.
4533 /// When this VPlan is used for the epilogue vector loop, the entry will be
4534 /// replaced by a new entry block created during skeleton creation.
4535 VPBasicBlock *Entry;
4536
4537 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4538 VPIRBasicBlock *ScalarHeader;
4539
4540 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4541 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4542 /// e.g. if the scalar epilogue always executes.
4544
4545 /// Holds the VFs applicable to this VPlan.
4547
4548 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4549 /// any UF.
4551
4552 /// Holds the name of the VPlan, for printing.
4553 std::string Name;
4554
4555 /// Represents the trip count of the original loop, for folding
4556 /// the tail.
4557 VPValue *TripCount = nullptr;
4558
4559 /// Represents the backedge taken count of the original loop, for folding
4560 /// the tail. It equals TripCount - 1.
4561 VPSymbolicValue *BackedgeTakenCount = nullptr;
4562
4563 /// Represents the vector trip count.
4564 VPSymbolicValue VectorTripCount;
4565
4566 /// Represents the vectorization factor of the loop.
4567 VPSymbolicValue VF;
4568
4569 /// Represents the unroll factor of the loop.
4570 VPSymbolicValue UF;
4571
4572 /// Represents the loop-invariant VF * UF of the vector loop region.
4573 VPSymbolicValue VFxUF;
4574
4575 /// Contains all the external definitions created for this VPlan, as a mapping
4576 /// from IR Values to VPIRValues.
4578
4579 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4580 /// VPlan is destroyed.
4581 SmallVector<VPBlockBase *> CreatedBlocks;
4582
4583 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4584 /// wrapping the original header of the scalar loop.
4585 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4586 : Entry(Entry), ScalarHeader(ScalarHeader) {
4587 Entry->setPlan(this);
4588 assert(ScalarHeader->getNumSuccessors() == 0 &&
4589 "scalar header must be a leaf node");
4590 }
4591
4592public:
4593 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4594 /// original preheader and scalar header of \p L, to be used as entry and
4595 /// scalar header blocks of the new VPlan.
4596 VPlan(Loop *L);
4597
4598 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4599 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4600 VPlan(BasicBlock *ScalarHeaderBB) {
4601 setEntry(createVPBasicBlock("preheader"));
4602 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4603 }
4604
4606
4608 Entry = VPBB;
4609 VPBB->setPlan(this);
4610 }
4611
4612 /// Generate the IR code for this VPlan.
4613 void execute(VPTransformState *State);
4614
4615 /// Return the cost of this plan.
4617
4618 VPBasicBlock *getEntry() { return Entry; }
4619 const VPBasicBlock *getEntry() const { return Entry; }
4620
4621 /// Returns the preheader of the vector loop region, if one exists, or null
4622 /// otherwise.
4624 VPRegionBlock *VectorRegion = getVectorLoopRegion();
4625 return VectorRegion
4626 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4627 : nullptr;
4628 }
4629
4630 /// Returns the VPRegionBlock of the vector loop.
4633
4634 /// Returns the 'middle' block of the plan, that is the block that selects
4635 /// whether to execute the scalar tail loop or the exit block from the loop
4636 /// latch. If there is an early exit from the vector loop, the middle block
4637 /// conceptully has the early exit block as third successor, split accross 2
4638 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4639 /// tail loop or the exit block. If the scalar tail loop or exit block are
4640 /// known to always execute, the middle block may branch directly to that
4641 /// block. This function cannot be called once the vector loop region has been
4642 /// removed.
4644 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4645 assert(
4646 LoopRegion &&
4647 "cannot call the function after vector loop region has been removed");
4648 // The middle block is always the last successor of the region.
4649 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4650 }
4651
4653 return const_cast<VPlan *>(this)->getMiddleBlock();
4654 }
4655
4656 /// Return the VPBasicBlock for the preheader of the scalar loop.
4659 getScalarHeader()->getSinglePredecessor());
4660 }
4661
4662 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4663 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4664
4665 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4666 /// the original scalar loop.
4667 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4668
4669 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4670 /// exit block.
4672
4673 /// Returns true if \p VPBB is an exit block.
4674 bool isExitBlock(VPBlockBase *VPBB);
4675
4676 /// The trip count of the original loop.
4678 assert(TripCount && "trip count needs to be set before accessing it");
4679 return TripCount;
4680 }
4681
4682 /// Set the trip count assuming it is currently null; if it is not - use
4683 /// resetTripCount().
4684 void setTripCount(VPValue *NewTripCount) {
4685 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4686 TripCount = NewTripCount;
4687 }
4688
4689 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4690 /// the original trip count have been replaced.
4691 void resetTripCount(VPValue *NewTripCount) {
4692 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4693 "TripCount must be set when resetting");
4694 TripCount = NewTripCount;
4695 }
4696
4697 /// The backedge taken count of the original loop.
4699 if (!BackedgeTakenCount)
4700 BackedgeTakenCount = new VPSymbolicValue();
4701 return BackedgeTakenCount;
4702 }
4703 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4704
4705 /// The vector trip count.
4706 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4707
4708 /// Returns the VF of the vector loop region.
4709 VPSymbolicValue &getVF() { return VF; };
4710 const VPSymbolicValue &getVF() const { return VF; };
4711
4712 /// Returns the UF of the vector loop region.
4713 VPSymbolicValue &getUF() { return UF; };
4714
4715 /// Returns VF * UF of the vector loop region.
4716 VPSymbolicValue &getVFxUF() { return VFxUF; }
4717
4720 }
4721
4722 const DataLayout &getDataLayout() const {
4724 }
4725
4726 void addVF(ElementCount VF) { VFs.insert(VF); }
4727
4729 assert(hasVF(VF) && "Cannot set VF not already in plan");
4730 VFs.clear();
4731 VFs.insert(VF);
4732 }
4733
4734 /// Remove \p VF from the plan.
4736 assert(hasVF(VF) && "tried to remove VF not present in plan");
4737 VFs.remove(VF);
4738 }
4739
4740 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4741 bool hasScalableVF() const {
4742 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4743 }
4744
4745 /// Returns an iterator range over all VFs of the plan.
4748 return VFs;
4749 }
4750
4751 /// Returns the single VF of the plan, asserting that the plan has exactly
4752 /// one VF.
4754 assert(VFs.size() == 1 && "expected plan with single VF");
4755 return VFs[0];
4756 }
4757
4758 bool hasScalarVFOnly() const {
4759 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4760 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4761 "Plan with scalar VF should only have a single VF");
4762 return HasScalarVFOnly;
4763 }
4764
4765 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4766
4767 /// Returns the concrete UF of the plan, after unrolling.
4768 unsigned getConcreteUF() const {
4769 assert(UFs.size() == 1 && "Expected a single UF");
4770 return UFs[0];
4771 }
4772
4773 void setUF(unsigned UF) {
4774 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4775 UFs.clear();
4776 UFs.insert(UF);
4777 }
4778
4779 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4780 /// concrete UF.
4781 bool isUnrolled() const { return UFs.size() == 1; }
4782
4783 /// Return a string with the name of the plan and the applicable VFs and UFs.
4784 std::string getName() const;
4785
4786 void setName(const Twine &newName) { Name = newName.str(); }
4787
4788 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4789 /// yet) for \p V.
4791 assert(V && "Trying to get or add the VPIRValue of a null Value");
4792 auto [It, Inserted] = LiveIns.try_emplace(V);
4793 if (Inserted) {
4794 if (auto *CI = dyn_cast<ConstantInt>(V))
4795 It->second = new VPConstantInt(CI);
4796 else
4797 It->second = new VPIRValue(V);
4798 }
4799
4800 assert(isa<VPIRValue>(It->second) &&
4801 "Only VPIRValues should be in mapping");
4802 return It->second;
4803 }
4805 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4806 return getOrAddLiveIn(V->getValue());
4807 }
4808
4809 /// Return a VPIRValue wrapping i1 true.
4810 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4811
4812 /// Return a VPIRValue wrapping i1 false.
4813 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4814
4815 /// Return a VPIRValue wrapping the null value of type \p Ty.
4816 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4817
4818 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4820 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4821 }
4822
4823 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4824 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4825 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4826 }
4827
4828 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4829 /// value.
4831 bool IsSigned = false) {
4832 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4833 }
4834
4835 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4837 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4838 }
4839
4840 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4841 /// otherwise.
4842 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4843
4844 /// Return the list of live-in VPValues available in the VPlan.
4845 auto getLiveIns() const { return LiveIns.values(); }
4846
4847#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4848 /// Print the live-ins of this VPlan to \p O.
4849 void printLiveIns(raw_ostream &O) const;
4850
4851 /// Print this VPlan to \p O.
4852 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4853
4854 /// Print this VPlan in DOT format to \p O.
4855 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4856
4857 /// Dump the plan to stderr (for debugging).
4858 LLVM_DUMP_METHOD void dump() const;
4859#endif
4860
4861 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4862 /// recipes to refer to the clones, and return it.
4864
4865 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4866 /// present. The returned block is owned by the VPlan and deleted once the
4867 /// VPlan is destroyed.
4869 VPRecipeBase *Recipe = nullptr) {
4870 auto *VPB = new VPBasicBlock(Name, Recipe);
4871 CreatedBlocks.push_back(VPB);
4872 return VPB;
4873 }
4874
4875 /// Create a new loop region with a canonical IV using \p CanIVTy and
4876 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
4877 /// to \p Entry and \p Exiting respectively, if provided. The returned block
4878 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4880 const std::string &Name = "",
4881 VPBlockBase *Entry = nullptr,
4882 VPBlockBase *Exiting = nullptr) {
4883 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
4884 CreatedBlocks.push_back(VPB);
4885 return VPB;
4886 }
4887
4888 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4889 /// returned block is owned by the VPlan and deleted once the VPlan is
4890 /// destroyed.
4892 const std::string &Name = "") {
4893 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4894 CreatedBlocks.push_back(VPB);
4895 return VPB;
4896 }
4897
4898 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4899 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4900 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4902
4903 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4904 /// instructions in \p IRBB, except its terminator which is managed by the
4905 /// successors of the block in VPlan. The returned block is owned by the VPlan
4906 /// and deleted once the VPlan is destroyed.
4908
4909 /// Returns true if the VPlan is based on a loop with an early exit. That is
4910 /// the case if the VPlan has either more than one exit block or a single exit
4911 /// block with multiple predecessors (one for the exit via the latch and one
4912 /// via the other early exit).
4913 bool hasEarlyExit() const {
4914 return count_if(ExitBlocks,
4915 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4916 1 ||
4917 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4918 }
4919
4920 /// Returns true if the scalar tail may execute after the vector loop, i.e.
4921 /// if the middle block is a predecessor of the scalar preheader. Note that
4922 /// this relies on unneeded branches to the scalar tail loop being removed.
4923 bool hasScalarTail() const {
4924 auto *ScalarPH = getScalarPreheader();
4925 return ScalarPH &&
4926 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
4927 }
4928};
4929
4930#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4931inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4932 Plan.print(OS);
4933 return OS;
4934}
4935#endif
4936
4937} // end namespace llvm
4938
4939#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:587
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
str - Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3809
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3803
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4168
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4196
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4243
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4198
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4195
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4221
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4179
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4185
iterator end()
Definition VPlan.h:4205
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4203
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4197
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4256
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:786
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
~VPBasicBlock() override
Definition VPlan.h:4189
const_reverse_iterator rbegin() const
Definition VPlan.h:4209
reverse_iterator rend()
Definition VPlan.h:4210
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4183
VPRecipeBase & back()
Definition VPlan.h:4218
const VPRecipeBase & front() const
Definition VPlan.h:4215
const_iterator begin() const
Definition VPlan.h:4204
VPRecipeBase & front()
Definition VPlan.h:4216
const VPRecipeBase & back() const
Definition VPlan.h:4217
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4234
bool empty() const
Definition VPlan.h:4214
const_iterator end() const
Definition VPlan.h:4206
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4229
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4224
reverse_iterator rbegin()
Definition VPlan.h:4208
friend class VPlan
Definition VPlan.h:4169
size_t size() const
Definition VPlan.h:4213
const_reverse_iterator rend() const
Definition VPlan.h:4211
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2819
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2824
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2794
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2814
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2835
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2801
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2830
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2810
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:98
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:319
VPRegionBlock * getParent()
Definition VPlan.h:190
VPBlocksTy & getPredecessors()
Definition VPlan.h:227
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:224
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:389
void setName(const Twine &newName)
Definition VPlan.h:183
size_t getNumSuccessors() const
Definition VPlan.h:241
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:223
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:221
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:341
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:666
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:177
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:277
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:354
size_t getNumPredecessors() const
Definition VPlan.h:242
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:310
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:224
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:347
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:219
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:226
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:175
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:196
const VPRegionBlock * getParent() const
Definition VPlan.h:191
const std::string & getName() const
Definition VPlan.h:181
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:329
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:267
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:301
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:237
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:261
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:326
friend class VPBlockUtils
Definition VPlan.h:99
unsigned getVPBlockID() const
Definition VPlan.h:188
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:368
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:333
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:167
VPBlocksTy & getSuccessors()
Definition VPlan.h:216
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:216
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:290
void setParent(VPRegionBlock *P)
Definition VPlan.h:201
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:283
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:231
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:215
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3306
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3290
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3314
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3287
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4362
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4359
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4363
bool hasNUW() const
Definition VPlan.h:4365
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3841
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3835
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3853
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3847
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3860
~VPCurrentIterationPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPIRValue * getStartValue() const
Definition VPlan.h:3963
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3955
VPValue * getStepValue() const
Definition VPlan.h:3964
Type * getScalarType() const
Definition VPlan.h:3961
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3943
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3928
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3967
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition VPlan.h:3935
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3778
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3783
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3769
const SCEV * getSCEV() const
Definition VPlan.h:3789
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3774
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3441
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3423
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3405
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3393
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3379
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3371
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3375
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3435
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3373
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2302
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2306
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2319
static bool classof(const VPValue *V)
Definition VPlan.h:2316
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2342
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2347
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2331
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2339
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2312
VPValue * getStartValue() const
Definition VPlan.h:2334
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2351
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2057
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2074
unsigned getOpcode() const
Definition VPlan.h:2070
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2050
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4321
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:461
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4345
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4335
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4322
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:486
Class to record and manage LLVM IR flags.
Definition VPlan.h:688
FastMathFlagsTy FMFs
Definition VPlan.h:776
ReductionFlagsTy ReductionFlags
Definition VPlan.h:778
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1031
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:869
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:849
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:835
WrapFlagsTy WrapFlags
Definition VPlan.h:770
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:828
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:993
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1057
TruncFlagsTy TruncFlags
Definition VPlan.h:771
CmpInst::Predicate getPredicate() const
Definition VPlan.h:965
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1041
uint8_t AllFlags[2]
Definition VPlan.h:779
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1001
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:874
ExactFlagsTy ExactFlags
Definition VPlan.h:773
bool hasNoSignedWrap() const
Definition VPlan.h:1020
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1045
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:840
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:845
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:854
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:823
uint8_t GEPFlagsStorage
Definition VPlan.h:774
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:859
bool isNonNeg() const
Definition VPlan.h:1003
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:983
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:988
DisjointFlagsTy DisjointFlags
Definition VPlan.h:772
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:973
bool hasNoUnsignedWrap() const
Definition VPlan.h:1009
FCmpFlagsTy FCmpFlags
Definition VPlan.h:777
NonNegFlagsTy NonNegFlags
Definition VPlan.h:775
bool isReductionInLoop() const
Definition VPlan.h:1063
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:885
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:922
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:864
uint8_t CmpPredStorage
Definition VPlan.h:769
RecurKind getRecurKind() const
Definition VPlan.h:1051
VPIRFlags(Instruction &I)
Definition VPlan.h:785
Instruction & getInstruction() const
Definition VPlan.h:1716
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1724
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1703
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1730
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1718
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1691
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1168
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1204
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1176
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1188
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1521
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1563
static bool classof(const VPUser *R)
Definition VPlan.h:1548
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1529
Type * getResultType() const
Definition VPlan.h:1569
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1552
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1223
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1453
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1473
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1394
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1333
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1324
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1340
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1314
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1327
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1267
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1318
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1262
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1259
@ VScale
Returns the value for vscale.
Definition VPlan.h:1336
@ CanonicalIVIncrementForPart
Definition VPlan.h:1243
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1270
bool hasResult() const
Definition VPlan.h:1418
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1476
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1458
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1498
unsigned getOpcode() const
Definition VPlan.h:1402
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1501
friend class VPlanSlp
Definition VPlan.h:1224
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1467
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1443
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2924
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2930
static bool classof(const VPUser *U)
Definition VPlan.h:2906
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2872
Instruction * getInsertPos() const
Definition VPlan.h:2928
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2901
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2926
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2918
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2947
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2912
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2999
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3027
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3021
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3034
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3014
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3001
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2957
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2984
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2967
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2978
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2959
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1581
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1610
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1605
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4312
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1630
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1590
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1615
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1619
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3498
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3480
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3491
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3476
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:406
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:549
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4513
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:560
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:480
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:554
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:529
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:408
const VPBasicBlock * getParent() const
Definition VPlan.h:481
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:534
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:424
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:526
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:470
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:303
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:143
friend class VPValue
Definition VPlanValue.h:304
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3182
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3161
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3185
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3172
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2755
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2741
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2720
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2734
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2767
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2749
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2708
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2758
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2772
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2764
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2752
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3050
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3059
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3124
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3093
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3108
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3135
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3137
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3120
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3073
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3122
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3079
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3126
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3133
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3128
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3087
static bool classof(const VPUser *U)
Definition VPlan.h:3098
static bool classof(const VPValue *VPV)
Definition VPlan.h:3103
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3142
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4378
const VPBlockBase * getEntry() const
Definition VPlan.h:4422
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4454
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4439
VPBlockBase * getExiting()
Definition VPlan.h:4435
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4493
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4427
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4498
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4503
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4506
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4490
const VPBlockBase * getExiting() const
Definition VPlan.h:4434
VPBlockBase * getEntry()
Definition VPlan.h:4423
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4447
friend class VPlan
Definition VPlan.h:4379
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4418
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:209
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3204
bool isSingleScalar() const
Definition VPlan.h:3245
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3212
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3257
bool isPredicated() const
Definition VPlan.h:3247
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3226
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3250
unsigned getOpcode() const
Definition VPlan.h:3274
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3269
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4059
VPValue * getStepValue() const
Definition VPlan.h:4032
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4026
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3999
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4045
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4011
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4040
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4036
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3992
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4053
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:606
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:612
static bool classof(const VPValue *V)
Definition VPlan.h:660
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:673
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:616
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:676
static bool classof(const VPUser *U)
Definition VPlan.h:665
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:608
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1156
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:329
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1544
operand_range operands()
Definition VPlanValue.h:397
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:373
unsigned getNumOperands() const
Definition VPlanValue.h:367
operand_iterator op_end()
Definition VPlanValue.h:395
operand_iterator op_begin()
Definition VPlanValue.h:393
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:368
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:348
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:391
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:390
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:137
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
friend class VPRecipeValue
Definition VPlanValue.h:52
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:202
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2183
VPValue * getVFValue() const
Definition VPlan.h:2172
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2169
int64_t getStride() const
Definition VPlan.h:2170
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2204
VPValue * getOffset() const
Definition VPlan.h:2173
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2197
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2159
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2190
VPValue * getPointer() const
Definition VPlan.h:2171
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2241
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2243
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2250
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2228
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2266
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2257
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1989
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1996
const_operand_range args() const
Definition VPlan.h:2030
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2011
operand_range args()
Definition VPlan.h:2029
Function * getCalledScalarFunction() const
Definition VPlan.h:2025
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV)
Definition VPlan.h:3878
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3895
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:3902
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3883
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1837
Instruction::CastOps getOpcode() const
Definition VPlan.h:1875
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1878
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1845
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1860
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2121
Type * getSourceElementType() const
Definition VPlan.h:2126
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2129
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2113
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2099
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2434
static bool classof(const VPValue *V)
Definition VPlan.h:2382
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2401
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2419
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2394
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2409
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2412
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2370
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2397
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2417
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2426
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2377
const VPValue * getVFValue() const
Definition VPlan.h:2404
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2387
const VPValue * getStepValue() const
Definition VPlan.h:2398
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2495
const TruncInst * getTruncInst() const
Definition VPlan.h:2511
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2489
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2499
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2481
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2455
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2510
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2464
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2526
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2506
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2519
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1889
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1920
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1960
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1969
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1906
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1975
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1941
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1972
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1963
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3526
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3561
static bool classof(const VPUser *U)
Definition VPlan.h:3555
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3580
Instruction & Ingredient
Definition VPlan.h:3517
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3544
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3536
Instruction & getIngredient() const
Definition VPlan.h:3588
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3523
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3548
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3571
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3520
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3567
void setMask(VPValue *Mask)
Definition VPlan.h:3528
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3577
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3564
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2626
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition VPlan.h:2591
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2599
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2553
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2562
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2543
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1781
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1801
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1828
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1785
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1793
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1818
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition VPlanSLP.h:74
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4526
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4842
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1196
friend class VPSlotTracker
Definition VPlan.h:4528
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1172
bool hasVF(ElementCount VF) const
Definition VPlan.h:4740
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4753
const DataLayout & getDataLayout() const
Definition VPlan.h:4722
LLVMContext & getContext() const
Definition VPlan.h:4718
VPBasicBlock * getEntry()
Definition VPlan.h:4618
void setName(const Twine &newName)
Definition VPlan.h:4786
bool hasScalableVF() const
Definition VPlan.h:4741
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4677
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4698
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4747
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:932
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:907
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4804
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:940
const VPBasicBlock * getEntry() const
Definition VPlan.h:4619
friend class VPlanPrinter
Definition VPlan.h:4527
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4813
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4836
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4716
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4819
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4891
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1329
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4845
bool hasUF(unsigned UF) const
Definition VPlan.h:4765
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4667
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4706
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4703
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4790
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4879
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4816
void setVF(ElementCount VF)
Definition VPlan.h:4728
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4781
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1095
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4913
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1077
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4768
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4830
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4652
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4684
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4691
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4643
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4607
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4868
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1335
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4735
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4810
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1202
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4713
bool hasScalarVFOnly() const
Definition VPlan.h:4758
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4657
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:950
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1155
void addVF(ElementCount VF)
Definition VPlan.h:4726
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4663
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1111
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4623
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4709
void setUF(unsigned UF)
Definition VPlan.h:4773
const VPSymbolicValue & getVF() const
Definition VPlan.h:4710
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:4923
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1243
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4600
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4824
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2507
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:831
LLVM_PACKED_END
Definition VPlan.h:1106
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2681
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:366
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:83
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:93
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2679
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:78
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
static VPIRMetadata * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4124
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4114
static VPIRMetadata * castFailed()
Used by inherited doCastIfPossible to dyn_cast.
Definition VPlan.h:4152
static VPPhiAccessors * castFailed()
Used by inherited doCastIfPossible to dyn_cast.
Definition VPlan.h:4095
static VPPhiAccessors * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4081
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4075
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2673
Possible variants of a reduction.
Definition VPlan.h:2671
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2676
unsigned VFScaleFactor
Definition VPlan.h:2677
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:258
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2642
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2654
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2633
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:720
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:725
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:715
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:708
PHINode & getIRPhi()
Definition VPlan.h:1762
VPIRPhi(PHINode &PN)
Definition VPlan.h:1750
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1752
static bool classof(const VPUser *U)
Definition VPlan.h:1757
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1773
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:141
static bool classof(const VPUser *U)
Definition VPlan.h:1649
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1645
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1664
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1679
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1659
static bool classof(const VPValue *V)
Definition VPlan.h:1654
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1110
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1143
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1116
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1111
static bool classof(const VPValue *V)
Definition VPlan.h:1136
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1131
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:280
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3634
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3647
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3635
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3657
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3594
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3595
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3614
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3603
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3718
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3730
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3719
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3743
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3733
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3675
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3676
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3693
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3684
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3699