LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// \enum UncountableExitStyle
80/// Different methods of handling early exits.
81///
84 /// No side effects to worry about, so we can process any uncountable exits
85 /// in the loop and branch either to the middle block if the trip count was
86 /// reached, or an early exitblock to determine which exit was taken.
88 /// All memory operations other than the load(s) required to determine whether
89 /// an uncountable exit occurre will be masked based on that condition. If an
90 /// uncountable exit is taken, then all lanes before the exiting lane will
91 /// complete, leaving just the final lane to execute in the scalar tail.
93};
94
95/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
96/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
98 friend class VPBlockUtils;
99
100 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
101
102 /// An optional name for the block.
103 std::string Name;
104
105 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
106 /// it is a topmost VPBlockBase.
107 VPRegionBlock *Parent = nullptr;
108
109 /// List of predecessor blocks.
111
112 /// List of successor blocks.
114
115 /// VPlan containing the block. Can only be set on the entry block of the
116 /// plan.
117 VPlan *Plan = nullptr;
118
119 /// Add \p Successor as the last successor to this block.
120 void appendSuccessor(VPBlockBase *Successor) {
121 assert(Successor && "Cannot add nullptr successor!");
122 Successors.push_back(Successor);
123 }
124
125 /// Add \p Predecessor as the last predecessor to this block.
126 void appendPredecessor(VPBlockBase *Predecessor) {
127 assert(Predecessor && "Cannot add nullptr predecessor!");
128 Predecessors.push_back(Predecessor);
129 }
130
131 /// Remove \p Predecessor from the predecessors of this block.
132 void removePredecessor(VPBlockBase *Predecessor) {
133 auto Pos = find(Predecessors, Predecessor);
134 assert(Pos && "Predecessor does not exist");
135 Predecessors.erase(Pos);
136 }
137
138 /// Remove \p Successor from the successors of this block.
139 void removeSuccessor(VPBlockBase *Successor) {
140 auto Pos = find(Successors, Successor);
141 assert(Pos && "Successor does not exist");
142 Successors.erase(Pos);
143 }
144
145 /// This function replaces one predecessor with another, useful when
146 /// trying to replace an old block in the CFG with a new one.
147 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
148 auto I = find(Predecessors, Old);
149 assert(I != Predecessors.end());
150 assert(Old->getParent() == New->getParent() &&
151 "replaced predecessor must have the same parent");
152 *I = New;
153 }
154
155 /// This function replaces one successor with another, useful when
156 /// trying to replace an old block in the CFG with a new one.
157 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
158 auto I = find(Successors, Old);
159 assert(I != Successors.end());
160 assert(Old->getParent() == New->getParent() &&
161 "replaced successor must have the same parent");
162 *I = New;
163 }
164
165protected:
166 VPBlockBase(const unsigned char SC, const std::string &N)
167 : SubclassID(SC), Name(N) {}
168
169public:
170 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
171 /// that are actually instantiated. Values of this enumeration are kept in the
172 /// SubclassID field of the VPBlockBase objects. They are used for concrete
173 /// type identification.
174 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
175
177
178 virtual ~VPBlockBase() = default;
179
180 const std::string &getName() const { return Name; }
181
182 void setName(const Twine &newName) { Name = newName.str(); }
183
184 /// \return an ID for the concrete type of this object.
185 /// This is used to implement the classof checks. This should not be used
186 /// for any other purpose, as the values may change as LLVM evolves.
187 unsigned getVPBlockID() const { return SubclassID; }
188
189 VPRegionBlock *getParent() { return Parent; }
190 const VPRegionBlock *getParent() const { return Parent; }
191
192 /// \return A pointer to the plan containing the current block.
193 VPlan *getPlan();
194 const VPlan *getPlan() const;
195
196 /// Sets the pointer of the plan containing the block. The block must be the
197 /// entry block into the VPlan.
198 void setPlan(VPlan *ParentPlan);
199
200 void setParent(VPRegionBlock *P) { Parent = P; }
201
202 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
203 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
204 /// VPBlockBase is a VPBasicBlock, it is returned.
205 const VPBasicBlock *getEntryBasicBlock() const;
206 VPBasicBlock *getEntryBasicBlock();
207
208 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
209 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
210 /// VPBlockBase is a VPBasicBlock, it is returned.
211 const VPBasicBlock *getExitingBasicBlock() const;
212 VPBasicBlock *getExitingBasicBlock();
213
214 const VPBlocksTy &getSuccessors() const { return Successors; }
215 VPBlocksTy &getSuccessors() { return Successors; }
216
217 /// Returns true if this block has any successors.
218 bool hasSuccessors() const { return !Successors.empty(); }
219 /// Returns true if this block has any predecessors.
220 bool hasPredecessors() const { return !Predecessors.empty(); }
221
224
225 const VPBlocksTy &getPredecessors() const { return Predecessors; }
226 VPBlocksTy &getPredecessors() { return Predecessors; }
227
228 /// \return the successor of this VPBlockBase if it has a single successor.
229 /// Otherwise return a null pointer.
231 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
232 }
233
234 /// \return the predecessor of this VPBlockBase if it has a single
235 /// predecessor. Otherwise return a null pointer.
237 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
238 }
239
240 size_t getNumSuccessors() const { return Successors.size(); }
241 size_t getNumPredecessors() const { return Predecessors.size(); }
242
243 /// An Enclosing Block of a block B is any block containing B, including B
244 /// itself. \return the closest enclosing block starting from "this", which
245 /// has successors. \return the root enclosing block if all enclosing blocks
246 /// have no successors.
247 VPBlockBase *getEnclosingBlockWithSuccessors();
248
249 /// \return the closest enclosing block starting from "this", which has
250 /// predecessors. \return the root enclosing block if all enclosing blocks
251 /// have no predecessors.
252 VPBlockBase *getEnclosingBlockWithPredecessors();
253
254 /// \return the successors either attached directly to this VPBlockBase or, if
255 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
256 /// successors of its own, search recursively for the first enclosing
257 /// VPRegionBlock that has successors and return them. If no such
258 /// VPRegionBlock exists, return the (empty) successors of the topmost
259 /// VPBlockBase reached.
261 return getEnclosingBlockWithSuccessors()->getSuccessors();
262 }
263
264 /// \return the hierarchical successor of this VPBlockBase if it has a single
265 /// hierarchical successor. Otherwise return a null pointer.
267 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
268 }
269
270 /// \return the predecessors either attached directly to this VPBlockBase or,
271 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
272 /// predecessors of its own, search recursively for the first enclosing
273 /// VPRegionBlock that has predecessors and return them. If no such
274 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
275 /// VPBlockBase reached.
277 return getEnclosingBlockWithPredecessors()->getPredecessors();
278 }
279
280 /// \return the hierarchical predecessor of this VPBlockBase if it has a
281 /// single hierarchical predecessor. Otherwise return a null pointer.
285
286 /// Set a given VPBlockBase \p Successor as the single successor of this
287 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
288 /// This VPBlockBase must have no successors.
290 assert(Successors.empty() && "Setting one successor when others exist.");
291 assert(Successor->getParent() == getParent() &&
292 "connected blocks must have the same parent");
293 appendSuccessor(Successor);
294 }
295
296 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
297 /// successors of this VPBlockBase. This VPBlockBase is not added as
298 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
299 /// successors.
300 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
301 assert(Successors.empty() && "Setting two successors when others exist.");
302 appendSuccessor(IfTrue);
303 appendSuccessor(IfFalse);
304 }
305
306 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
307 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
308 /// as successor of any VPBasicBlock in \p NewPreds.
310 assert(Predecessors.empty() && "Block predecessors already set.");
311 for (auto *Pred : NewPreds)
312 appendPredecessor(Pred);
313 }
314
315 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
316 /// This VPBlockBase must have no successors. This VPBlockBase is not added
317 /// as predecessor of any VPBasicBlock in \p NewSuccs.
319 assert(Successors.empty() && "Block successors already set.");
320 for (auto *Succ : NewSuccs)
321 appendSuccessor(Succ);
322 }
323
324 /// Remove all the predecessor of this block.
325 void clearPredecessors() { Predecessors.clear(); }
326
327 /// Remove all the successors of this block.
328 void clearSuccessors() { Successors.clear(); }
329
330 /// Swap predecessors of the block. The block must have exactly 2
331 /// predecessors.
333 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
334 std::swap(Predecessors[0], Predecessors[1]);
335 }
336
337 /// Swap successors of the block. The block must have exactly 2 successors.
338 // TODO: This should be part of introducing conditional branch recipes rather
339 // than being independent.
341 assert(Successors.size() == 2 && "must have 2 successors to swap");
342 std::swap(Successors[0], Successors[1]);
343 }
344
345 /// Returns the index for \p Pred in the blocks predecessors list.
346 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
347 assert(count(Predecessors, Pred) == 1 &&
348 "must have Pred exactly once in Predecessors");
349 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
350 }
351
352 /// Returns the index for \p Succ in the blocks successor list.
353 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
354 assert(count(Successors, Succ) == 1 &&
355 "must have Succ exactly once in Successors");
356 return std::distance(Successors.begin(), find(Successors, Succ));
357 }
358
359 /// The method which generates the output IR that correspond to this
360 /// VPBlockBase, thereby "executing" the VPlan.
361 virtual void execute(VPTransformState *State) = 0;
362
363 /// Return the cost of the block.
365
366#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
367 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
368 OS << getName();
369 }
370
371 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
372 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
373 /// consequtive numbers.
374 ///
375 /// Note that the numbering is applied to the whole VPlan, so printing
376 /// individual blocks is consistent with the whole VPlan printing.
377 virtual void print(raw_ostream &O, const Twine &Indent,
378 VPSlotTracker &SlotTracker) const = 0;
379
380 /// Print plain-text dump of this VPlan to \p O.
381 void print(raw_ostream &O) const;
382
383 /// Print the successors of this block to \p O, prefixing all lines with \p
384 /// Indent.
385 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
386
387 /// Dump this VPBlockBase to dbgs().
388 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
389#endif
390
391 /// Clone the current block and it's recipes without updating the operands of
392 /// the cloned recipes, including all blocks in the single-entry single-exit
393 /// region for VPRegionBlocks.
394 virtual VPBlockBase *clone() = 0;
395};
396
397/// VPRecipeBase is a base class modeling a sequence of one or more output IR
398/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
399/// and is responsible for deleting its defined values. Single-value
400/// recipes must inherit from VPSingleDef instead of inheriting from both
401/// VPRecipeBase and VPValue separately.
403 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
404 public VPDef,
405 public VPUser {
406 friend VPBasicBlock;
407 friend class VPBlockUtils;
408
409 /// Subclass identifier (for isa/dyn_cast).
410 const unsigned char SubclassID;
411
412 /// Each VPRecipe belongs to a single VPBasicBlock.
413 VPBasicBlock *Parent = nullptr;
414
415 /// The debug location for the recipe.
416 DebugLoc DL;
417
418public:
419 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
420 /// that is actually instantiated. Values of this enumeration are kept in the
421 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
422 /// type identification.
423 using VPRecipeTy = enum {
424 VPBranchOnMaskSC,
425 VPDerivedIVSC,
426 VPExpandSCEVSC,
427 VPExpressionSC,
428 VPIRInstructionSC,
429 VPInstructionSC,
430 VPInterleaveEVLSC,
431 VPInterleaveSC,
432 VPReductionEVLSC,
433 VPReductionSC,
434 VPReplicateSC,
435 VPScalarIVStepsSC,
436 VPVectorPointerSC,
437 VPVectorEndPointerSC,
438 VPWidenCallSC,
439 VPWidenCanonicalIVSC,
440 VPWidenCastSC,
441 VPWidenGEPSC,
442 VPWidenIntrinsicSC,
443 VPWidenLoadEVLSC,
444 VPWidenLoadSC,
445 VPWidenStoreEVLSC,
446 VPWidenStoreSC,
447 VPWidenSC,
448 VPBlendSC,
449 VPHistogramSC,
450 // START: Phi-like recipes. Need to be kept together.
451 VPWidenPHISC,
452 VPPredInstPHISC,
453 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
454 // VPHeaderPHIRecipe need to be kept together.
455 VPCurrentIterationPHISC,
456 VPActiveLaneMaskPHISC,
457 VPFirstOrderRecurrencePHISC,
458 VPWidenIntOrFpInductionSC,
459 VPWidenPointerInductionSC,
460 VPReductionPHISC,
461 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
462 // END: Phi-like recipes
463 VPFirstPHISC = VPWidenPHISC,
464 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
465 VPLastHeaderPHISC = VPReductionPHISC,
466 VPLastPHISC = VPReductionPHISC,
467 };
468
469 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
471 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
472
473 ~VPRecipeBase() override = default;
474
475 /// Clone the current recipe.
476 virtual VPRecipeBase *clone() = 0;
477
478 /// \return the VPBasicBlock which this VPRecipe belongs to.
479 VPBasicBlock *getParent() { return Parent; }
480 const VPBasicBlock *getParent() const { return Parent; }
481
482 /// \return the VPRegionBlock which the recipe belongs to.
483 VPRegionBlock *getRegion();
484 const VPRegionBlock *getRegion() const;
485
486 /// The method which generates the output IR instructions that correspond to
487 /// this VPRecipe, thereby "executing" the VPlan.
488 virtual void execute(VPTransformState &State) = 0;
489
490 /// Return the cost of this recipe, taking into account if the cost
491 /// computation should be skipped and the ForceTargetInstructionCost flag.
492 /// Also takes care of printing the cost for debugging.
494
495 /// Insert an unlinked recipe into a basic block immediately before
496 /// the specified recipe.
497 void insertBefore(VPRecipeBase *InsertPos);
498 /// Insert an unlinked recipe into \p BB immediately before the insertion
499 /// point \p IP;
500 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
501
502 /// Insert an unlinked Recipe into a basic block immediately after
503 /// the specified Recipe.
504 void insertAfter(VPRecipeBase *InsertPos);
505
506 /// Unlink this recipe from its current VPBasicBlock and insert it into
507 /// the VPBasicBlock that MovePos lives in, right after MovePos.
508 void moveAfter(VPRecipeBase *MovePos);
509
510 /// Unlink this recipe and insert into BB before I.
511 ///
512 /// \pre I is a valid iterator into BB.
513 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
514
515 /// This method unlinks 'this' from the containing basic block, but does not
516 /// delete it.
517 void removeFromParent();
518
519 /// This method unlinks 'this' from the containing basic block and deletes it.
520 ///
521 /// \returns an iterator pointing to the element after the erased one
523
524 /// \return an ID for the concrete type of this object.
525 unsigned getVPRecipeID() const { return SubclassID; }
526
527 /// Method to support type inquiry through isa, cast, and dyn_cast.
528 static inline bool classof(const VPDef *D) {
529 // All VPDefs are also VPRecipeBases.
530 return true;
531 }
532
533 static inline bool classof(const VPUser *U) { return true; }
534
535 /// Returns true if the recipe may have side-effects.
536 bool mayHaveSideEffects() const;
537
538 /// Returns true for PHI-like recipes.
539 bool isPhi() const;
540
541 /// Returns true if the recipe may read from memory.
542 bool mayReadFromMemory() const;
543
544 /// Returns true if the recipe may write to memory.
545 bool mayWriteToMemory() const;
546
547 /// Returns true if the recipe may read from or write to memory.
548 bool mayReadOrWriteMemory() const {
550 }
551
552 /// Returns the debug location of the recipe.
553 DebugLoc getDebugLoc() const { return DL; }
554
555 /// Return true if the recipe is a scalar cast.
556 bool isScalarCast() const;
557
558 /// Set the recipe's debug location to \p NewDL.
559 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
560
561#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
562 /// Dump the recipe to stderr (for debugging).
563 LLVM_ABI_FOR_TEST void dump() const;
564
565 /// Print the recipe, delegating to printRecipe().
566 void print(raw_ostream &O, const Twine &Indent,
568#endif
569
570protected:
571 /// Compute the cost of this recipe either using a recipe's specialized
572 /// implementation or using the legacy cost model and the underlying
573 /// instructions.
574 virtual InstructionCost computeCost(ElementCount VF,
575 VPCostContext &Ctx) const;
576
577#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
578 /// Each concrete VPRecipe prints itself, without printing common information,
579 /// like debug info or metadata.
580 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
581 VPSlotTracker &SlotTracker) const = 0;
582#endif
583};
584
585// Helper macro to define common classof implementations for recipes.
586#define VP_CLASSOF_IMPL(VPRecipeID) \
587 static inline bool classof(const VPRecipeBase *R) { \
588 return R->getVPRecipeID() == VPRecipeID; \
589 } \
590 static inline bool classof(const VPValue *V) { \
591 auto *R = V->getDefiningRecipe(); \
592 return R && R->getVPRecipeID() == VPRecipeID; \
593 } \
594 static inline bool classof(const VPUser *U) { \
595 auto *R = dyn_cast<VPRecipeBase>(U); \
596 return R && R->getVPRecipeID() == VPRecipeID; \
597 } \
598 static inline bool classof(const VPSingleDefRecipe *R) { \
599 return R->getVPRecipeID() == VPRecipeID; \
600 }
601
602/// VPSingleDef is a base class for recipes for modeling a sequence of one or
603/// more output IR that define a single result VPValue.
604/// Note that VPRecipeBase must be inherited from before VPValue.
606public:
607 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
609 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
610
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
613 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
614
615 static inline bool classof(const VPRecipeBase *R) {
616 switch (R->getVPRecipeID()) {
617 case VPRecipeBase::VPDerivedIVSC:
618 case VPRecipeBase::VPExpandSCEVSC:
619 case VPRecipeBase::VPExpressionSC:
620 case VPRecipeBase::VPInstructionSC:
621 case VPRecipeBase::VPReductionEVLSC:
622 case VPRecipeBase::VPReductionSC:
623 case VPRecipeBase::VPReplicateSC:
624 case VPRecipeBase::VPScalarIVStepsSC:
625 case VPRecipeBase::VPVectorPointerSC:
626 case VPRecipeBase::VPVectorEndPointerSC:
627 case VPRecipeBase::VPWidenCallSC:
628 case VPRecipeBase::VPWidenCanonicalIVSC:
629 case VPRecipeBase::VPWidenCastSC:
630 case VPRecipeBase::VPWidenGEPSC:
631 case VPRecipeBase::VPWidenIntrinsicSC:
632 case VPRecipeBase::VPWidenSC:
633 case VPRecipeBase::VPBlendSC:
634 case VPRecipeBase::VPPredInstPHISC:
635 case VPRecipeBase::VPCurrentIterationPHISC:
636 case VPRecipeBase::VPActiveLaneMaskPHISC:
637 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
638 case VPRecipeBase::VPWidenPHISC:
639 case VPRecipeBase::VPWidenIntOrFpInductionSC:
640 case VPRecipeBase::VPWidenPointerInductionSC:
641 case VPRecipeBase::VPReductionPHISC:
642 return true;
643 case VPRecipeBase::VPBranchOnMaskSC:
644 case VPRecipeBase::VPInterleaveEVLSC:
645 case VPRecipeBase::VPInterleaveSC:
646 case VPRecipeBase::VPIRInstructionSC:
647 case VPRecipeBase::VPWidenLoadEVLSC:
648 case VPRecipeBase::VPWidenLoadSC:
649 case VPRecipeBase::VPWidenStoreEVLSC:
650 case VPRecipeBase::VPWidenStoreSC:
651 case VPRecipeBase::VPHistogramSC:
652 // TODO: Widened stores don't define a value, but widened loads do. Split
653 // the recipes to be able to make widened loads VPSingleDefRecipes.
654 return false;
655 }
656 llvm_unreachable("Unhandled VPRecipeID");
657 }
658
659 static inline bool classof(const VPValue *V) {
660 auto *R = V->getDefiningRecipe();
661 return R && classof(R);
662 }
663
664 static inline bool classof(const VPUser *U) {
665 auto *R = dyn_cast<VPRecipeBase>(U);
666 return R && classof(R);
667 }
668
669 VPSingleDefRecipe *clone() override = 0;
670
671 /// Returns the underlying instruction.
678
679#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
680 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
682#endif
683};
684
685/// Class to record and manage LLVM IR flags.
688 enum class OperationType : unsigned char {
689 Cmp,
690 FCmp,
691 OverflowingBinOp,
692 Trunc,
693 DisjointOp,
694 PossiblyExactOp,
695 GEPOp,
696 FPMathOp,
697 NonNegOp,
698 ReductionOp,
699 Other
700 };
701
702public:
703 struct WrapFlagsTy {
704 char HasNUW : 1;
705 char HasNSW : 1;
706
708 };
709
711 char HasNUW : 1;
712 char HasNSW : 1;
713
715 };
716
721
723 char NonNeg : 1;
724 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
725 };
726
727private:
728 struct ExactFlagsTy {
729 char IsExact : 1;
730 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
731 };
732 struct FastMathFlagsTy {
733 char AllowReassoc : 1;
734 char NoNaNs : 1;
735 char NoInfs : 1;
736 char NoSignedZeros : 1;
737 char AllowReciprocal : 1;
738 char AllowContract : 1;
739 char ApproxFunc : 1;
740
741 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
742 };
743 /// Holds both the predicate and fast-math flags for floating-point
744 /// comparisons.
745 struct FCmpFlagsTy {
746 uint8_t CmpPredStorage;
747 FastMathFlagsTy FMFs;
748 };
749 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
750 struct ReductionFlagsTy {
751 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
752 // additional kinds.
753 unsigned char Kind : 6;
754 // TODO: Derive order/in-loop from plan and remove here.
755 unsigned char IsOrdered : 1;
756 unsigned char IsInLoop : 1;
757 FastMathFlagsTy FMFs;
758
759 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
760 FastMathFlags FMFs)
761 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
762 IsInLoop(IsInLoop), FMFs(FMFs) {}
763 };
764
765 OperationType OpType;
766
767 union {
772 ExactFlagsTy ExactFlags;
775 FastMathFlagsTy FMFs;
776 FCmpFlagsTy FCmpFlags;
777 ReductionFlagsTy ReductionFlags;
779 };
780
781public:
782 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
783
785 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
786 OpType = OperationType::FCmp;
788 FCmp->getPredicate());
789 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
790 FCmpFlags.FMFs = FCmp->getFastMathFlags();
791 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
792 OpType = OperationType::Cmp;
794 Op->getPredicate());
795 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
796 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
797 OpType = OperationType::DisjointOp;
798 DisjointFlags.IsDisjoint = Op->isDisjoint();
799 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
800 OpType = OperationType::OverflowingBinOp;
801 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
802 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
803 OpType = OperationType::Trunc;
804 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
805 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
806 OpType = OperationType::PossiblyExactOp;
807 ExactFlags.IsExact = Op->isExact();
808 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
809 OpType = OperationType::GEPOp;
810 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
811 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
812 "wrap flags truncated");
813 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
814 OpType = OperationType::NonNegOp;
815 NonNegFlags.NonNeg = PNNI->hasNonNeg();
816 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
817 OpType = OperationType::FPMathOp;
818 FMFs = Op->getFastMathFlags();
819 }
820 }
821
822 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
824 assert(getPredicate() == Pred && "predicate truncated");
825 }
826
828 : OpType(OperationType::FCmp), AllFlags() {
830 assert(getPredicate() == Pred && "predicate truncated");
831 FCmpFlags.FMFs = FMFs;
832 }
833
835 : OpType(OperationType::OverflowingBinOp), AllFlags() {
836 this->WrapFlags = WrapFlags;
837 }
838
840 : OpType(OperationType::Trunc), AllFlags() {
841 this->TruncFlags = TruncFlags;
842 }
843
844 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
845 this->FMFs = FMFs;
846 }
847
849 : OpType(OperationType::DisjointOp), AllFlags() {
850 this->DisjointFlags = DisjointFlags;
851 }
852
854 : OpType(OperationType::NonNegOp), AllFlags() {
855 this->NonNegFlags = NonNegFlags;
856 }
857
858 VPIRFlags(ExactFlagsTy ExactFlags)
859 : OpType(OperationType::PossiblyExactOp), AllFlags() {
860 this->ExactFlags = ExactFlags;
861 }
862
864 : OpType(OperationType::GEPOp), AllFlags() {
865 GEPFlagsStorage = GEPFlags.getRaw();
866 }
867
868 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
869 : OpType(OperationType::ReductionOp), AllFlags() {
870 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
871 }
872
874 OpType = Other.OpType;
875 AllFlags[0] = Other.AllFlags[0];
876 AllFlags[1] = Other.AllFlags[1];
877 }
878
879 /// Only keep flags also present in \p Other. \p Other must have the same
880 /// OpType as the current object.
881 void intersectFlags(const VPIRFlags &Other);
882
883 /// Drop all poison-generating flags.
885 // NOTE: This needs to be kept in-sync with
886 // Instruction::dropPoisonGeneratingFlags.
887 switch (OpType) {
888 case OperationType::OverflowingBinOp:
889 WrapFlags.HasNUW = false;
890 WrapFlags.HasNSW = false;
891 break;
892 case OperationType::Trunc:
893 TruncFlags.HasNUW = false;
894 TruncFlags.HasNSW = false;
895 break;
896 case OperationType::DisjointOp:
897 DisjointFlags.IsDisjoint = false;
898 break;
899 case OperationType::PossiblyExactOp:
900 ExactFlags.IsExact = false;
901 break;
902 case OperationType::GEPOp:
903 GEPFlagsStorage = 0;
904 break;
905 case OperationType::FPMathOp:
906 case OperationType::FCmp:
907 case OperationType::ReductionOp:
908 getFMFsRef().NoNaNs = false;
909 getFMFsRef().NoInfs = false;
910 break;
911 case OperationType::NonNegOp:
912 NonNegFlags.NonNeg = false;
913 break;
914 case OperationType::Cmp:
915 case OperationType::Other:
916 break;
917 }
918 }
919
920 /// Apply the IR flags to \p I.
921 void applyFlags(Instruction &I) const {
922 switch (OpType) {
923 case OperationType::OverflowingBinOp:
924 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
925 I.setHasNoSignedWrap(WrapFlags.HasNSW);
926 break;
927 case OperationType::Trunc:
928 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
929 I.setHasNoSignedWrap(TruncFlags.HasNSW);
930 break;
931 case OperationType::DisjointOp:
932 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
933 break;
934 case OperationType::PossiblyExactOp:
935 I.setIsExact(ExactFlags.IsExact);
936 break;
937 case OperationType::GEPOp:
938 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
940 break;
941 case OperationType::FPMathOp:
942 case OperationType::FCmp: {
943 const FastMathFlagsTy &F = getFMFsRef();
944 I.setHasAllowReassoc(F.AllowReassoc);
945 I.setHasNoNaNs(F.NoNaNs);
946 I.setHasNoInfs(F.NoInfs);
947 I.setHasNoSignedZeros(F.NoSignedZeros);
948 I.setHasAllowReciprocal(F.AllowReciprocal);
949 I.setHasAllowContract(F.AllowContract);
950 I.setHasApproxFunc(F.ApproxFunc);
951 break;
952 }
953 case OperationType::NonNegOp:
954 I.setNonNeg(NonNegFlags.NonNeg);
955 break;
956 case OperationType::ReductionOp:
957 llvm_unreachable("reduction ops should not use applyFlags");
958 case OperationType::Cmp:
959 case OperationType::Other:
960 break;
961 }
962 }
963
965 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
966 "recipe doesn't have a compare predicate");
967 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
970 }
971
973 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
974 "recipe doesn't have a compare predicate");
975 if (OpType == OperationType::FCmp)
977 else
979 assert(getPredicate() == Pred && "predicate truncated");
980 }
981
985
986 /// Returns true if the recipe has a comparison predicate.
987 bool hasPredicate() const {
988 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
989 }
990
991 /// Returns true if the recipe has fast-math flags.
992 bool hasFastMathFlags() const {
993 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
994 OpType == OperationType::ReductionOp;
995 }
996
998
999 /// Returns true if the recipe has non-negative flag.
1000 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1001
1002 bool isNonNeg() const {
1003 assert(OpType == OperationType::NonNegOp &&
1004 "recipe doesn't have a NNEG flag");
1005 return NonNegFlags.NonNeg;
1006 }
1007
1008 bool hasNoUnsignedWrap() const {
1009 switch (OpType) {
1010 case OperationType::OverflowingBinOp:
1011 return WrapFlags.HasNUW;
1012 case OperationType::Trunc:
1013 return TruncFlags.HasNUW;
1014 default:
1015 llvm_unreachable("recipe doesn't have a NUW flag");
1016 }
1017 }
1018
1019 bool hasNoSignedWrap() const {
1020 switch (OpType) {
1021 case OperationType::OverflowingBinOp:
1022 return WrapFlags.HasNSW;
1023 case OperationType::Trunc:
1024 return TruncFlags.HasNSW;
1025 default:
1026 llvm_unreachable("recipe doesn't have a NSW flag");
1027 }
1028 }
1029
1030 bool hasNoWrapFlags() const {
1031 switch (OpType) {
1032 case OperationType::OverflowingBinOp:
1033 case OperationType::Trunc:
1034 return true;
1035 default:
1036 return false;
1037 }
1038 }
1039
1041 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1042 }
1043
1044 bool isDisjoint() const {
1045 assert(OpType == OperationType::DisjointOp &&
1046 "recipe cannot have a disjoing flag");
1047 return DisjointFlags.IsDisjoint;
1048 }
1049
1051 assert(OpType == OperationType::ReductionOp &&
1052 "recipe doesn't have reduction flags");
1053 return static_cast<RecurKind>(ReductionFlags.Kind);
1054 }
1055
1056 bool isReductionOrdered() const {
1057 assert(OpType == OperationType::ReductionOp &&
1058 "recipe doesn't have reduction flags");
1059 return ReductionFlags.IsOrdered;
1060 }
1061
1062 bool isReductionInLoop() const {
1063 assert(OpType == OperationType::ReductionOp &&
1064 "recipe doesn't have reduction flags");
1065 return ReductionFlags.IsInLoop;
1066 }
1067
1068private:
1069 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1070 FastMathFlagsTy &getFMFsRef() {
1071 if (OpType == OperationType::FCmp)
1072 return FCmpFlags.FMFs;
1073 if (OpType == OperationType::ReductionOp)
1074 return ReductionFlags.FMFs;
1075 return FMFs;
1076 }
1077 const FastMathFlagsTy &getFMFsRef() const {
1078 if (OpType == OperationType::FCmp)
1079 return FCmpFlags.FMFs;
1080 if (OpType == OperationType::ReductionOp)
1081 return ReductionFlags.FMFs;
1082 return FMFs;
1083 }
1084
1085public:
1086 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1087 /// otherwise. Opcodes not supporting default flags include compares and
1088 /// ComputeReductionResult.
1089 static VPIRFlags getDefaultFlags(unsigned Opcode);
1090
1091#if !defined(NDEBUG)
1092 /// Returns true if the set flags are valid for \p Opcode.
1093 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1094
1095 /// Returns true if \p Opcode has its required flags set.
1096 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1097#endif
1098
1099#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1100 void printFlags(raw_ostream &O) const;
1101#endif
1102};
1104
1105static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1106
1107/// A pure-virtual common base class for recipes defining a single VPValue and
1108/// using IR flags.
1110 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1111 const VPIRFlags &Flags,
1113 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1114
1115 static inline bool classof(const VPRecipeBase *R) {
1116 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1117 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1118 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1119 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1120 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1121 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1122 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1123 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1124 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1125 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1126 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1127 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1128 }
1129
1130 static inline bool classof(const VPUser *U) {
1131 auto *R = dyn_cast<VPRecipeBase>(U);
1132 return R && classof(R);
1133 }
1134
1135 static inline bool classof(const VPValue *V) {
1136 auto *R = V->getDefiningRecipe();
1137 return R && classof(R);
1138 }
1139
1141
1142 static inline bool classof(const VPSingleDefRecipe *R) {
1143 return classof(static_cast<const VPRecipeBase *>(R));
1144 }
1145
1146 void execute(VPTransformState &State) override = 0;
1147
1148 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1150 VPCostContext &Ctx) const;
1151};
1152
1153/// Helper to access the operand that contains the unroll part for this recipe
1154/// after unrolling.
1155template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1156protected:
1157 /// Return the VPValue operand containing the unroll part or null if there is
1158 /// no such operand.
1159 VPValue *getUnrollPartOperand(const VPUser &U) const;
1160
1161 /// Return the unroll part.
1162 unsigned getUnrollPart(const VPUser &U) const;
1163};
1164
1165/// Helper to manage IR metadata for recipes. It filters out metadata that
1166/// cannot be propagated.
1169
1170public:
1171 VPIRMetadata() = default;
1172
1173 /// Adds metatadata that can be preserved from the original instruction
1174 /// \p I.
1176
1177 /// Copy constructor for cloning.
1179
1181
1182 /// Add all metadata to \p I.
1183 void applyMetadata(Instruction &I) const;
1184
1185 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1186 /// already exists, it will be replaced. Otherwise, it will be added.
1187 void setMetadata(unsigned Kind, MDNode *Node) {
1188 auto It =
1189 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1190 return P.first == Kind;
1191 });
1192 if (It != Metadata.end())
1193 It->second = Node;
1194 else
1195 Metadata.emplace_back(Kind, Node);
1196 }
1197
1198 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1199 /// nodes that are common to both.
1200 void intersect(const VPIRMetadata &MD);
1201
1202 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1203 MDNode *getMetadata(unsigned Kind) const {
1204 auto It =
1205 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1206 return It != Metadata.end() ? It->second : nullptr;
1207 }
1208
1209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1210 /// Print metadata with node IDs.
1211 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1212#endif
1213};
1214
1215/// This is a concrete Recipe that models a single VPlan-level instruction.
1216/// While as any Recipe it may generate a sequence of IR instructions when
1217/// executed, these instructions would always form a single-def expression as
1218/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1219/// opcodes can take an optional mask. Masks may be assigned during
1220/// predication.
1222 public VPIRMetadata {
1223public:
1224 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1225 enum {
1227 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1228 // values of a first-order recurrence.
1230 // Creates a mask where each lane is active (true) whilst the current
1231 // counter (first operand + index) is less than the second operand. i.e.
1232 // mask[i] = icmpt ult (op0 + i), op1
1233 // The size of the mask returned is VF * Multiplier (UF, third op).
1237 // Increment the canonical IV separately for each unrolled part.
1239 // Abstract instruction that compares two values and branches. This is
1240 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1243 // Branch with 2 boolean condition operands and 3 successors. If condition
1244 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1245 // successor 1; otherwise branches to successor 2. Expanded after region
1246 // dissolution into: (1) an OR of the two conditions branching to
1247 // middle.split or successor 2, and (2) middle.split branching to successor
1248 // 0 or successor 1 based on condition 0.
1251 /// Given operands of (the same) struct type, creates a struct of fixed-
1252 /// width vectors each containing a struct field of all operands. The
1253 /// number of operands matches the element count of every vector.
1255 /// Creates a fixed-width vector containing all operands. The number of
1256 /// operands matches the vector element count.
1258 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1259 /// abstract VPInstruction whose single defined VPValue represents VF
1260 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1261 /// VPInstructions.
1263 /// Reduce the operands to the final reduction result using the operation
1264 /// specified via the operation's VPIRFlags.
1266 // Extracts the last part of its operand. Removed during unrolling.
1268 // Extracts the last lane of its vector operand, per part.
1270 // Extracts the second-to-last lane from its operand or the second-to-last
1271 // part if it is scalar. In the latter case, the recipe will be removed
1272 // during unrolling.
1274 LogicalAnd, // Non-poison propagating logical And.
1275 LogicalOr, // Non-poison propagating logical Or.
1276 // Add an offset in bytes (second operand) to a base pointer (first
1277 // operand). Only generates scalar values (either for the first lane only or
1278 // for all lanes, depending on its uses).
1280 // Add a vector offset in bytes (second operand) to a scalar base pointer
1281 // (first operand).
1283 // Returns a scalar boolean value, which is true if any lane of its
1284 // (boolean) vector operands is true. It produces the reduced value across
1285 // all unrolled iterations. Unrolling will add all copies of its original
1286 // operand as additional operands. AnyOf is poison-safe as all operands
1287 // will be frozen.
1289 // Calculates the first active lane index of the vector predicate operands.
1290 // It produces the lane index across all unrolled iterations. Unrolling will
1291 // add all copies of its original operand as additional operands.
1292 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1293 // result even with operands that are all zeroes.
1295 // Calculates the last active lane index of the vector predicate operands.
1296 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1297 // tail-folding to extract the correct live-out value from the last active
1298 // iteration. It produces the lane index across all unrolled iterations.
1299 // Unrolling will add all copies of its original operand as additional
1300 // operands.
1302 // Returns a reversed vector for the operand.
1304
1305 // The opcodes below are used for VPInstructionWithType.
1306 //
1307 /// Scale the first operand (vector step) by the second operand
1308 /// (scalar-step). Casts both operands to the result type if needed.
1310 /// Start vector for reductions with 3 operands: the original start value,
1311 /// the identity value for the reduction and an integer indicating the
1312 /// scaling factor.
1314 // Creates a step vector starting from 0 to VF with a step of 1.
1316 /// Extracts a single lane (first operand) from a set of vector operands.
1317 /// The lane specifies an index into a vector formed by combining all vector
1318 /// operands (all operands after the first one).
1320 /// Explicit user for the resume phi of the canonical induction in the main
1321 /// VPlan, used by the epilogue vector loop.
1323 /// Extracts the last active lane from a set of vectors. The first operand
1324 /// is the default value if no lanes in the masks are active. Conceptually,
1325 /// this concatenates all data vectors (odd operands), concatenates all
1326 /// masks (even operands -- ignoring the default value), and returns the
1327 /// last active value from the combined data vector using the combined mask.
1329
1330 /// Returns the value for vscale.
1332 /// Compute the exiting value of a wide induction after vectorization, that
1333 /// is the value of the last lane of the induction increment (i.e. its
1334 /// backedge value). Has the wide induction recipe as operand.
1338 };
1339
1340 /// Returns true if this VPInstruction generates scalar values for all lanes.
1341 /// Most VPInstructions generate a single value per part, either vector or
1342 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1343 /// values per all lanes, stemming from an original ingredient. This method
1344 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1345 /// underlying ingredient.
1346 bool doesGeneratePerAllLanes() const;
1347
1348 /// Return the number of operands determined by the opcode of the
1349 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1350 /// cannot be determined directly by the opcode.
1351 unsigned getNumOperandsForOpcode() const;
1352
1353private:
1354 typedef unsigned char OpcodeTy;
1355 OpcodeTy Opcode;
1356
1357 /// An optional name that can be used for the generated IR instruction.
1358 std::string Name;
1359
1360 /// Returns true if we can generate a scalar for the first lane only if
1361 /// needed.
1362 bool canGenerateScalarForFirstLane() const;
1363
1364 /// Utility methods serving execute(): generates a single vector instance of
1365 /// the modeled instruction. \returns the generated value. . In some cases an
1366 /// existing value is returned rather than a generated one.
1367 Value *generate(VPTransformState &State);
1368
1369 /// Returns true if the VPInstruction does not need masking.
1370 bool alwaysUnmasked() const {
1371 if (Opcode == VPInstruction::MaskedCond)
1372 return false;
1373
1374 // For now only VPInstructions with underlying values use masks.
1375 // TODO: provide masks to VPInstructions w/o underlying values.
1376 if (!getUnderlyingValue())
1377 return true;
1378
1379 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1380 }
1381
1382public:
1383 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1384 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1385 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1386
1387 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1388
1389 VPInstruction *clone() override {
1390 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1391 getDebugLoc(), Name);
1392 if (getUnderlyingValue())
1393 New->setUnderlyingValue(getUnderlyingInstr());
1394 return New;
1395 }
1396
1397 unsigned getOpcode() const { return Opcode; }
1398
1399 /// Generate the instruction.
1400 /// TODO: We currently execute only per-part unless a specific instance is
1401 /// provided.
1402 void execute(VPTransformState &State) override;
1403
1404 /// Return the cost of this VPInstruction.
1405 InstructionCost computeCost(ElementCount VF,
1406 VPCostContext &Ctx) const override;
1407
1408#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1409 /// Print the VPInstruction to dbgs() (for debugging).
1410 LLVM_DUMP_METHOD void dump() const;
1411#endif
1412
1413 bool hasResult() const {
1414 // CallInst may or may not have a result, depending on the called function.
1415 // Conservatively return calls have results for now.
1416 switch (getOpcode()) {
1417 case Instruction::Ret:
1418 case Instruction::UncondBr:
1419 case Instruction::CondBr:
1420 case Instruction::Store:
1421 case Instruction::Switch:
1422 case Instruction::IndirectBr:
1423 case Instruction::Resume:
1424 case Instruction::CatchRet:
1425 case Instruction::Unreachable:
1426 case Instruction::Fence:
1427 case Instruction::AtomicRMW:
1431 return false;
1432 default:
1433 return true;
1434 }
1435 }
1436
1437 /// Returns true if the VPInstruction has a mask operand.
1438 bool isMasked() const {
1439 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1440 // VPInstructions without a fixed number of operands cannot be masked.
1441 if (NumOpsForOpcode == -1u)
1442 return false;
1443 return NumOpsForOpcode + 1 == getNumOperands();
1444 }
1445
1446 /// Returns the number of operands, excluding the mask if the VPInstruction is
1447 /// masked.
1448 unsigned getNumOperandsWithoutMask() const {
1449 return getNumOperands() - isMasked();
1450 }
1451
1452 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1453 void addMask(VPValue *Mask) {
1454 assert(!isMasked() && "recipe is already masked");
1455 if (alwaysUnmasked())
1456 return;
1457 addOperand(Mask);
1458 }
1459
1460 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1461 /// VPInstructions.
1462 VPValue *getMask() const {
1463 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1464 }
1465
1466 /// Returns an iterator range over the operands excluding the mask operand
1467 /// if present.
1474
1475 /// Returns true if the underlying opcode may read from or write to memory.
1476 bool opcodeMayReadOrWriteFromMemory() const;
1477
1478 /// Returns true if the recipe only uses the first lane of operand \p Op.
1479 bool usesFirstLaneOnly(const VPValue *Op) const override;
1480
1481 /// Returns true if the recipe only uses the first part of operand \p Op.
1482 bool usesFirstPartOnly(const VPValue *Op) const override;
1483
1484 /// Returns true if this VPInstruction produces a scalar value from a vector,
1485 /// e.g. by performing a reduction or extracting a lane.
1486 bool isVectorToScalar() const;
1487
1488 /// Returns true if this VPInstruction's operands are single scalars and the
1489 /// result is also a single scalar.
1490 bool isSingleScalar() const;
1491
1492 /// Returns the symbolic name assigned to the VPInstruction.
1493 StringRef getName() const { return Name; }
1494
1495 /// Set the symbolic name for the VPInstruction.
1496 void setName(StringRef NewName) { Name = NewName.str(); }
1497
1498protected:
1499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1500 /// Print the VPInstruction to \p O.
1501 void printRecipe(raw_ostream &O, const Twine &Indent,
1502 VPSlotTracker &SlotTracker) const override;
1503#endif
1504};
1505
1506/// A specialization of VPInstruction augmenting it with a dedicated result
1507/// type, to be used when the opcode and operands of the VPInstruction don't
1508/// directly determine the result type. Note that there is no separate recipe ID
1509/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1510/// distinguished purely by the opcode.
1512 /// Scalar result type produced by the recipe.
1513 Type *ResultTy;
1514
1515public:
1517 Type *ResultTy, const VPIRFlags &Flags = {},
1518 const VPIRMetadata &Metadata = {},
1520 const Twine &Name = "")
1521 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1522 ResultTy(ResultTy) {}
1523
1524 static inline bool classof(const VPRecipeBase *R) {
1525 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1526 // type information.
1527 if (R->isScalarCast())
1528 return true;
1529 auto *VPI = dyn_cast<VPInstruction>(R);
1530 if (!VPI)
1531 return false;
1532 switch (VPI->getOpcode()) {
1536 case Instruction::Load:
1537 return true;
1538 default:
1539 return false;
1540 }
1541 }
1542
1543 static inline bool classof(const VPUser *R) {
1545 }
1546
1547 VPInstruction *clone() override {
1548 auto *New =
1550 *this, *this, getDebugLoc(), getName());
1551 New->setUnderlyingValue(getUnderlyingValue());
1552 return New;
1553 }
1554
1555 void execute(VPTransformState &State) override;
1556
1557 /// Return the cost of this VPInstruction.
1559 VPCostContext &Ctx) const override {
1560 // TODO: Compute accurate cost after retiring the legacy cost model.
1561 return 0;
1562 }
1563
1564 Type *getResultType() const { return ResultTy; }
1565
1566protected:
1567#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1568 /// Print the recipe.
1569 void printRecipe(raw_ostream &O, const Twine &Indent,
1570 VPSlotTracker &SlotTracker) const override;
1571#endif
1572};
1573
1574/// Helper type to provide functions to access incoming values and blocks for
1575/// phi-like recipes.
1577protected:
1578 /// Return a VPRecipeBase* to the current object.
1579 virtual const VPRecipeBase *getAsRecipe() const = 0;
1580
1581public:
1582 virtual ~VPPhiAccessors() = default;
1583
1584 /// Returns the incoming VPValue with index \p Idx.
1585 VPValue *getIncomingValue(unsigned Idx) const {
1586 return getAsRecipe()->getOperand(Idx);
1587 }
1588
1589 /// Returns the incoming block with index \p Idx.
1590 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1591
1592 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1593 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1594
1595 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1596 /// block.
1597 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1598
1599 /// Returns the number of incoming values, also number of incoming blocks.
1600 virtual unsigned getNumIncoming() const {
1601 return getAsRecipe()->getNumOperands();
1602 }
1603
1604 /// Returns an interator range over the incoming values.
1606 return make_range(getAsRecipe()->op_begin(),
1607 getAsRecipe()->op_begin() + getNumIncoming());
1608 }
1609
1611 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1612
1613 /// Returns an iterator range over the incoming blocks.
1615 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1616 return getIncomingBlock(Idx);
1617 };
1618 return map_range(index_range(0, getNumIncoming()), GetBlock);
1619 }
1620
1621 /// Returns an iterator range over pairs of incoming values and corresponding
1622 /// incoming blocks.
1628
1629 /// Removes the incoming value for \p IncomingBlock, which must be a
1630 /// predecessor.
1631 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1632
1633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1634 /// Print the recipe.
1636#endif
1637};
1638
1641 const Twine &Name = "")
1642 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1643
1644 static inline bool classof(const VPUser *U) {
1645 auto *VPI = dyn_cast<VPInstruction>(U);
1646 return VPI && VPI->getOpcode() == Instruction::PHI;
1647 }
1648
1649 static inline bool classof(const VPValue *V) {
1650 auto *VPI = dyn_cast<VPInstruction>(V);
1651 return VPI && VPI->getOpcode() == Instruction::PHI;
1652 }
1653
1654 static inline bool classof(const VPSingleDefRecipe *SDR) {
1655 auto *VPI = dyn_cast<VPInstruction>(SDR);
1656 return VPI && VPI->getOpcode() == Instruction::PHI;
1657 }
1658
1659 VPPhi *clone() override {
1660 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1661 PhiR->setUnderlyingValue(getUnderlyingValue());
1662 return PhiR;
1663 }
1664
1665 void execute(VPTransformState &State) override;
1666
1667protected:
1668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1669 /// Print the recipe.
1670 void printRecipe(raw_ostream &O, const Twine &Indent,
1671 VPSlotTracker &SlotTracker) const override;
1672#endif
1673
1674 const VPRecipeBase *getAsRecipe() const override { return this; }
1675};
1676
1677/// A recipe to wrap on original IR instruction not to be modified during
1678/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1679/// Expect PHIs, VPIRInstructions cannot have any operands.
1681 Instruction &I;
1682
1683protected:
1684 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1685 /// subclasses may need to be created, e.g. VPIRPhi.
1687 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1688
1689public:
1690 ~VPIRInstruction() override = default;
1691
1692 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1693 /// VPIRInstruction.
1695
1696 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1697
1699 auto *R = create(I);
1700 for (auto *Op : operands())
1701 R->addOperand(Op);
1702 return R;
1703 }
1704
1705 void execute(VPTransformState &State) override;
1706
1707 /// Return the cost of this VPIRInstruction.
1709 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1710
1711 Instruction &getInstruction() const { return I; }
1712
1713 bool usesScalars(const VPValue *Op) const override {
1715 "Op must be an operand of the recipe");
1716 return true;
1717 }
1718
1719 bool usesFirstPartOnly(const VPValue *Op) const override {
1721 "Op must be an operand of the recipe");
1722 return true;
1723 }
1724
1725 bool usesFirstLaneOnly(const VPValue *Op) const override {
1727 "Op must be an operand of the recipe");
1728 return true;
1729 }
1730
1731protected:
1732#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1733 /// Print the recipe.
1734 void printRecipe(raw_ostream &O, const Twine &Indent,
1735 VPSlotTracker &SlotTracker) const override;
1736#endif
1737};
1738
1739/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1740/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1741/// allowed, and it is used to add a new incoming value for the single
1742/// predecessor VPBB.
1744 public VPPhiAccessors {
1746
1747 static inline bool classof(const VPRecipeBase *U) {
1748 auto *R = dyn_cast<VPIRInstruction>(U);
1749 return R && isa<PHINode>(R->getInstruction());
1750 }
1751
1752 static inline bool classof(const VPUser *U) {
1753 auto *R = dyn_cast<VPRecipeBase>(U);
1754 return R && classof(R);
1755 }
1756
1758
1759 void execute(VPTransformState &State) override;
1760
1761protected:
1762#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1763 /// Print the recipe.
1764 void printRecipe(raw_ostream &O, const Twine &Indent,
1765 VPSlotTracker &SlotTracker) const override;
1766#endif
1767
1768 const VPRecipeBase *getAsRecipe() const override { return this; }
1769};
1770
1771/// VPWidenRecipe is a recipe for producing a widened instruction using the
1772/// opcode and operands of the recipe. This recipe covers most of the
1773/// traditional vectorization cases where each recipe transforms into a
1774/// vectorized version of itself.
1776 public VPIRMetadata {
1777 unsigned Opcode;
1778
1779public:
1781 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1782 DebugLoc DL = {})
1783 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1784 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1785 setUnderlyingValue(&I);
1786 }
1787
1788 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1789 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1790 DebugLoc DL = {})
1791 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1792 VPIRMetadata(Metadata), Opcode(Opcode) {}
1793
1794 ~VPWidenRecipe() override = default;
1795
1796 VPWidenRecipe *clone() override {
1797 if (auto *UV = getUnderlyingValue())
1798 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1799 getDebugLoc());
1800 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1801 }
1802
1803 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1804
1805 /// Produce a widened instruction using the opcode and operands of the recipe,
1806 /// processing State.VF elements.
1807 void execute(VPTransformState &State) override;
1808
1809 /// Return the cost of this VPWidenRecipe.
1810 InstructionCost computeCost(ElementCount VF,
1811 VPCostContext &Ctx) const override;
1812
1813 unsigned getOpcode() const { return Opcode; }
1814
1815protected:
1816#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1817 /// Print the recipe.
1818 void printRecipe(raw_ostream &O, const Twine &Indent,
1819 VPSlotTracker &SlotTracker) const override;
1820#endif
1821
1822 /// Returns true if the recipe only uses the first lane of operand \p Op.
1823 bool usesFirstLaneOnly(const VPValue *Op) const override {
1825 "Op must be an operand of the recipe");
1826 return Opcode == Instruction::Select && Op == getOperand(0) &&
1827 Op->isDefinedOutsideLoopRegions();
1828 }
1829};
1830
1831/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1833 /// Cast instruction opcode.
1834 Instruction::CastOps Opcode;
1835
1836 /// Result type for the cast.
1837 Type *ResultTy;
1838
1839public:
1841 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1842 const VPIRMetadata &Metadata = {},
1844 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1845 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1846 assert(flagsValidForOpcode(Opcode) &&
1847 "Set flags not supported for the provided opcode");
1849 "Opcode requires specific flags to be set");
1851 }
1852
1853 ~VPWidenCastRecipe() override = default;
1854
1856 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1858 *this, *this, getDebugLoc());
1859 }
1860
1861 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1862
1863 /// Produce widened copies of the cast.
1864 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1865
1866 /// Return the cost of this VPWidenCastRecipe.
1868 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1869
1870 Instruction::CastOps getOpcode() const { return Opcode; }
1871
1872 /// Returns the result type of the cast.
1873 Type *getResultType() const { return ResultTy; }
1874
1875protected:
1876#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1877 /// Print the recipe.
1878 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1879 VPSlotTracker &SlotTracker) const override;
1880#endif
1881};
1882
1883/// A recipe for widening vector intrinsics.
1885 /// ID of the vector intrinsic to widen.
1886 Intrinsic::ID VectorIntrinsicID;
1887
1888 /// Scalar return type of the intrinsic.
1889 Type *ResultTy;
1890
1891 /// True if the intrinsic may read from memory.
1892 bool MayReadFromMemory;
1893
1894 /// True if the intrinsic may read write to memory.
1895 bool MayWriteToMemory;
1896
1897 /// True if the intrinsic may have side-effects.
1898 bool MayHaveSideEffects;
1899
1900public:
1902 ArrayRef<VPValue *> CallArguments, Type *Ty,
1903 const VPIRFlags &Flags = {},
1904 const VPIRMetadata &MD = {},
1906 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1907 Flags, DL),
1908 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1909 MayReadFromMemory(CI.mayReadFromMemory()),
1910 MayWriteToMemory(CI.mayWriteToMemory()),
1911 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1912 setUnderlyingValue(&CI);
1913 }
1914
1916 ArrayRef<VPValue *> CallArguments, Type *Ty,
1917 const VPIRFlags &Flags = {},
1918 const VPIRMetadata &Metadata = {},
1920 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1921 Flags, DL),
1922 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1923 ResultTy(Ty) {
1924 LLVMContext &Ctx = Ty->getContext();
1925 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1926 MemoryEffects ME = Attrs.getMemoryEffects();
1927 MayReadFromMemory = !ME.onlyWritesMemory();
1928 MayWriteToMemory = !ME.onlyReadsMemory();
1929 MayHaveSideEffects = MayWriteToMemory ||
1930 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1931 !Attrs.hasAttribute(Attribute::WillReturn);
1932 }
1933
1934 ~VPWidenIntrinsicRecipe() override = default;
1935
1937 if (Value *CI = getUnderlyingValue())
1938 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1939 operands(), ResultTy, *this, *this,
1940 getDebugLoc());
1941 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1942 *this, *this, getDebugLoc());
1943 }
1944
1945 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1946
1947 /// Produce a widened version of the vector intrinsic.
1948 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1949
1950 /// Return the cost of this vector intrinsic.
1952 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1953
1954 /// Return the ID of the intrinsic.
1955 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1956
1957 /// Return the scalar return type of the intrinsic.
1958 Type *getResultType() const { return ResultTy; }
1959
1960 /// Return to name of the intrinsic as string.
1962
1963 /// Returns true if the intrinsic may read from memory.
1964 bool mayReadFromMemory() const { return MayReadFromMemory; }
1965
1966 /// Returns true if the intrinsic may write to memory.
1967 bool mayWriteToMemory() const { return MayWriteToMemory; }
1968
1969 /// Returns true if the intrinsic may have side-effects.
1970 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1971
1972 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1973
1974protected:
1975#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1976 /// Print the recipe.
1977 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1978 VPSlotTracker &SlotTracker) const override;
1979#endif
1980};
1981
1982/// A recipe for widening Call instructions using library calls.
1984 public VPIRMetadata {
1985 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1986 /// between a given VF and the chosen vectorized variant, so there will be a
1987 /// different VPlan for each VF with a valid variant.
1988 Function *Variant;
1989
1990public:
1992 ArrayRef<VPValue *> CallArguments,
1993 const VPIRFlags &Flags = {},
1994 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1995 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
1996 DL),
1997 VPIRMetadata(Metadata), Variant(Variant) {
1998 setUnderlyingValue(UV);
1999 assert(
2000 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2001 "last operand must be the called function");
2002 }
2003
2004 ~VPWidenCallRecipe() override = default;
2005
2007 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2008 *this, *this, getDebugLoc());
2009 }
2010
2011 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2012
2013 /// Produce a widened version of the call instruction.
2014 void execute(VPTransformState &State) override;
2015
2016 /// Return the cost of this VPWidenCallRecipe.
2017 InstructionCost computeCost(ElementCount VF,
2018 VPCostContext &Ctx) const override;
2019
2023
2026
2027protected:
2028#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2029 /// Print the recipe.
2030 void printRecipe(raw_ostream &O, const Twine &Indent,
2031 VPSlotTracker &SlotTracker) const override;
2032#endif
2033};
2034
2035/// A recipe representing a sequence of load -> update -> store as part of
2036/// a histogram operation. This means there may be aliasing between vector
2037/// lanes, which is handled by the llvm.experimental.vector.histogram family
2038/// of intrinsics. The only update operations currently supported are
2039/// 'add' and 'sub' where the other term is loop-invariant.
2041 /// Opcode of the update operation, currently either add or sub.
2042 unsigned Opcode;
2043
2044public:
2045 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2047 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2048 Opcode(Opcode) {}
2049
2050 ~VPHistogramRecipe() override = default;
2051
2053 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2054 }
2055
2056 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2057
2058 /// Produce a vectorized histogram operation.
2059 void execute(VPTransformState &State) override;
2060
2061 /// Return the cost of this VPHistogramRecipe.
2063 VPCostContext &Ctx) const override;
2064
2065 unsigned getOpcode() const { return Opcode; }
2066
2067 /// Return the mask operand if one was provided, or a null pointer if all
2068 /// lanes should be executed unconditionally.
2069 VPValue *getMask() const {
2070 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2071 }
2072
2073protected:
2074#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2075 /// Print the recipe
2076 void printRecipe(raw_ostream &O, const Twine &Indent,
2077 VPSlotTracker &SlotTracker) const override;
2078#endif
2079};
2080
2081/// A recipe for handling GEP instructions.
2083 Type *SourceElementTy;
2084
2085 bool isPointerLoopInvariant() const {
2086 return getOperand(0)->isDefinedOutsideLoopRegions();
2087 }
2088
2089 bool isIndexLoopInvariant(unsigned I) const {
2090 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2091 }
2092
2093public:
2095 const VPIRFlags &Flags = {},
2097 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2098 SourceElementTy(GEP->getSourceElementType()) {
2099 setUnderlyingValue(GEP);
2101 (void)Metadata;
2103 assert(Metadata.empty() && "unexpected metadata on GEP");
2104 }
2105
2106 ~VPWidenGEPRecipe() override = default;
2107
2110 operands(), *this, getDebugLoc());
2111 }
2112
2113 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2114
2115 /// This recipe generates a GEP instruction.
2116 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2117
2118 /// Generate the gep nodes.
2119 void execute(VPTransformState &State) override;
2120
2121 Type *getSourceElementType() const { return SourceElementTy; }
2122
2123 /// Return the cost of this VPWidenGEPRecipe.
2125 VPCostContext &Ctx) const override {
2126 // TODO: Compute accurate cost after retiring the legacy cost model.
2127 return 0;
2128 }
2129
2130 /// Returns true if the recipe only uses the first lane of operand \p Op.
2131 bool usesFirstLaneOnly(const VPValue *Op) const override;
2132
2133protected:
2134#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2135 /// Print the recipe.
2136 void printRecipe(raw_ostream &O, const Twine &Indent,
2137 VPSlotTracker &SlotTracker) const override;
2138#endif
2139};
2140
2141/// A recipe to compute a pointer to the last element of each part of a widened
2142/// memory access for widened memory accesses of SourceElementTy. Used for
2143/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2144/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2145/// unroller otherwise.
2147 Type *SourceElementTy;
2148
2149 /// The constant stride of the pointer computed by this recipe, expressed in
2150 /// units of SourceElementTy.
2151 int64_t Stride;
2152
2153public:
2154 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2155 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2156 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2157 GEPFlags, DL),
2158 SourceElementTy(SourceElementTy), Stride(Stride) {
2159 assert(Stride < 0 && "Stride must be negative");
2160 }
2161
2162 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2163
2164 Type *getSourceElementType() const { return SourceElementTy; }
2165 int64_t getStride() const { return Stride; }
2166 VPValue *getPointer() const { return getOperand(0); }
2167 VPValue *getVFValue() const { return getOperand(1); }
2169 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2170 }
2171
2172 /// Adds the offset operand to the recipe.
2173 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2174 void materializeOffset(unsigned Part = 0);
2175
2176 void execute(VPTransformState &State) override;
2177
2178 bool usesFirstLaneOnly(const VPValue *Op) const override {
2180 "Op must be an operand of the recipe");
2181 return true;
2182 }
2183
2184 /// Return the cost of this VPVectorPointerRecipe.
2186 VPCostContext &Ctx) const override {
2187 // TODO: Compute accurate cost after retiring the legacy cost model.
2188 return 0;
2189 }
2190
2191 /// Returns true if the recipe only uses the first part of operand \p Op.
2192 bool usesFirstPartOnly(const VPValue *Op) const override {
2194 "Op must be an operand of the recipe");
2195 assert(getNumOperands() <= 2 && "must have at most two operands");
2196 return true;
2197 }
2198
2200 auto *VEPR = new VPVectorEndPointerRecipe(
2203 if (auto *Offset = getOffset())
2204 VEPR->addOperand(Offset);
2205 return VEPR;
2206 }
2207
2208protected:
2209#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2210 /// Print the recipe.
2211 void printRecipe(raw_ostream &O, const Twine &Indent,
2212 VPSlotTracker &SlotTracker) const override;
2213#endif
2214};
2215
2216/// A recipe to compute the pointers for widened memory accesses of \p
2217/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2218/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2220 Type *SourceElementTy;
2221
2222public:
2223 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2224 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2225 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2226 SourceElementTy(SourceElementTy) {}
2227
2228 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2229
2231 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2232 }
2233
2234 void execute(VPTransformState &State) override;
2235
2236 Type *getSourceElementType() const { return SourceElementTy; }
2237
2238 bool usesFirstLaneOnly(const VPValue *Op) const override {
2240 "Op must be an operand of the recipe");
2241 return true;
2242 }
2243
2244 /// Returns true if the recipe only uses the first part of operand \p Op.
2245 bool usesFirstPartOnly(const VPValue *Op) const override {
2247 "Op must be an operand of the recipe");
2248 assert(getNumOperands() <= 2 && "must have at most two operands");
2249 return true;
2250 }
2251
2253 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2255 if (auto *Off = getOffset())
2256 Clone->addOperand(Off);
2257 return Clone;
2258 }
2259
2260 /// Return the cost of this VPHeaderPHIRecipe.
2262 VPCostContext &Ctx) const override {
2263 // TODO: Compute accurate cost after retiring the legacy cost model.
2264 return 0;
2265 }
2266
2267protected:
2268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2269 /// Print the recipe.
2270 void printRecipe(raw_ostream &O, const Twine &Indent,
2271 VPSlotTracker &SlotTracker) const override;
2272#endif
2273};
2274
2275/// A pure virtual base class for all recipes modeling header phis, including
2276/// phis for first order recurrences, pointer inductions and reductions. The
2277/// start value is the first operand of the recipe and the incoming value from
2278/// the backedge is the second operand.
2279///
2280/// Inductions are modeled using the following sub-classes:
2281/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2282/// floating point inductions with arbitrary start and step values. Produces
2283/// a vector PHI per-part.
2284/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2285/// pointer induction. Produces either a vector PHI per-part or scalar values
2286/// per-lane based on the canonical induction.
2287/// * VPFirstOrderRecurrencePHIRecipe
2288/// * VPReductionPHIRecipe
2289/// * VPActiveLaneMaskPHIRecipe
2290/// * VPEVLBasedIVPHIRecipe
2291///
2292/// Note that the canonical IV is modeled as a VPRegionValue associated with
2293/// its loop region.
2295 public VPPhiAccessors {
2296protected:
2297 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2298 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2299 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2300
2301 const VPRecipeBase *getAsRecipe() const override { return this; }
2302
2303public:
2304 ~VPHeaderPHIRecipe() override = default;
2305
2306 /// Method to support type inquiry through isa, cast, and dyn_cast.
2307 static inline bool classof(const VPRecipeBase *R) {
2308 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2309 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2310 }
2311 static inline bool classof(const VPValue *V) {
2312 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2313 }
2314 static inline bool classof(const VPSingleDefRecipe *R) {
2315 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2316 }
2317
2318 /// Generate the phi nodes.
2319 void execute(VPTransformState &State) override = 0;
2320
2321 /// Return the cost of this header phi recipe.
2323 VPCostContext &Ctx) const override;
2324
2325 /// Returns the start value of the phi, if one is set.
2327 return getNumOperands() == 0 ? nullptr : getOperand(0);
2328 }
2330 return getNumOperands() == 0 ? nullptr : getOperand(0);
2331 }
2332
2333 /// Update the start value of the recipe.
2335
2336 /// Returns the incoming value from the loop backedge.
2338 return getOperand(1);
2339 }
2340
2341 /// Update the incoming value from the loop backedge.
2343
2344 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2345 /// to be a recipe.
2347 return *getBackedgeValue()->getDefiningRecipe();
2348 }
2349
2350protected:
2351#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2352 /// Print the recipe.
2353 void printRecipe(raw_ostream &O, const Twine &Indent,
2354 VPSlotTracker &SlotTracker) const override = 0;
2355#endif
2356};
2357
2358/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2359/// VPWidenPointerInductionRecipe), providing shared functionality, including
2360/// retrieving the step value, induction descriptor and original phi node.
2362 InductionDescriptor IndDesc;
2363
2364public:
2365 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2366 VPValue *Step, const InductionDescriptor &IndDesc,
2367 DebugLoc DL)
2368 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2369 addOperand(Step);
2370 }
2371
2372 static inline bool classof(const VPRecipeBase *R) {
2373 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2374 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2375 }
2376
2377 static inline bool classof(const VPValue *V) {
2378 auto *R = V->getDefiningRecipe();
2379 return R && classof(R);
2380 }
2381
2382 static inline bool classof(const VPSingleDefRecipe *R) {
2383 return classof(static_cast<const VPRecipeBase *>(R));
2384 }
2385
2386 void execute(VPTransformState &State) override = 0;
2387
2388 /// Returns the start value of the induction.
2390
2391 /// Returns the step value of the induction.
2393 const VPValue *getStepValue() const { return getOperand(1); }
2394
2395 /// Update the step value of the recipe.
2396 void setStepValue(VPValue *V) { setOperand(1, V); }
2397
2399 const VPValue *getVFValue() const { return getOperand(2); }
2400
2401 /// Returns the number of incoming values, also number of incoming blocks.
2402 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2403 /// incoming value, its start value.
2404 unsigned getNumIncoming() const override { return 1; }
2405
2406 /// Returns the underlying PHINode if one exists, or null otherwise.
2410
2411 /// Returns the induction descriptor for the recipe.
2412 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2413
2415 // TODO: All operands of base recipe must exist and be at same index in
2416 // derived recipe.
2418 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2419 }
2420
2422 // TODO: All operands of base recipe must exist and be at same index in
2423 // derived recipe.
2425 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2426 }
2427
2428 /// Returns true if the recipe only uses the first lane of operand \p Op.
2429 bool usesFirstLaneOnly(const VPValue *Op) const override {
2431 "Op must be an operand of the recipe");
2432 // The recipe creates its own wide start value, so it only requests the
2433 // first lane of the operand.
2434 // TODO: Remove once creating the start value is modeled separately.
2435 return Op == getStartValue() || Op == getStepValue();
2436 }
2437};
2438
2439/// A recipe for handling phi nodes of integer and floating-point inductions,
2440/// producing their vector values. This is an abstract recipe and must be
2441/// converted to concrete recipes before executing.
2443 public VPIRFlags {
2444 TruncInst *Trunc;
2445
2446 // If this recipe is unrolled it will have 2 additional operands.
2447 bool isUnrolled() const { return getNumOperands() == 5; }
2448
2449public:
2451 VPValue *VF, const InductionDescriptor &IndDesc,
2452 const VPIRFlags &Flags, DebugLoc DL)
2453 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2454 Start, Step, IndDesc, DL),
2455 VPIRFlags(Flags), Trunc(nullptr) {
2456 addOperand(VF);
2457 }
2458
2460 VPValue *VF, const InductionDescriptor &IndDesc,
2461 TruncInst *Trunc, const VPIRFlags &Flags,
2462 DebugLoc DL)
2463 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2464 Start, Step, IndDesc, DL),
2465 VPIRFlags(Flags), Trunc(Trunc) {
2466 addOperand(VF);
2468 (void)Metadata;
2469 if (Trunc)
2471 assert(Metadata.empty() && "unexpected metadata on Trunc");
2472 }
2473
2475
2481
2482 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2483
2484 void execute(VPTransformState &State) override {
2485 llvm_unreachable("cannot execute this recipe, should be expanded via "
2486 "expandVPWidenIntOrFpInductionRecipe");
2487 }
2488
2489 /// Returns the start value of the induction.
2491
2492 /// If the recipe has been unrolled, return the VPValue for the induction
2493 /// increment, otherwise return null.
2495 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2496 }
2497
2498 /// Returns the number of incoming values, also number of incoming blocks.
2499 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2500 /// incoming value, its start value.
2501 unsigned getNumIncoming() const override { return 1; }
2502
2503 /// Returns the first defined value as TruncInst, if it is one or nullptr
2504 /// otherwise.
2505 TruncInst *getTruncInst() { return Trunc; }
2506 const TruncInst *getTruncInst() const { return Trunc; }
2507
2508 /// Returns true if the induction is canonical, i.e. starting at 0 and
2509 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2510 /// same type as the canonical induction.
2511 bool isCanonical() const;
2512
2513 /// Returns the scalar type of the induction.
2515 return Trunc ? Trunc->getType() : getStartValue()->getType();
2516 }
2517
2518 /// Returns the VPValue representing the value of this induction at
2519 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2520 /// take place.
2522 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2523 }
2524
2525protected:
2526#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2527 /// Print the recipe.
2528 void printRecipe(raw_ostream &O, const Twine &Indent,
2529 VPSlotTracker &SlotTracker) const override;
2530#endif
2531};
2532
2534public:
2535 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2536 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2537 /// VF*UF.
2539 VPValue *NumUnrolledElems,
2540 const InductionDescriptor &IndDesc, DebugLoc DL)
2541 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2542 Start, Step, IndDesc, DL) {
2543 addOperand(NumUnrolledElems);
2544 }
2545
2547
2553
2554 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2555
2556 /// Generate vector values for the pointer induction.
2557 void execute(VPTransformState &State) override {
2558 llvm_unreachable("cannot execute this recipe, should be expanded via "
2559 "expandVPWidenPointerInduction");
2560 };
2561
2562 /// Returns true if only scalar values will be generated.
2563 bool onlyScalarsGenerated(bool IsScalable);
2564
2565protected:
2566#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2567 /// Print the recipe.
2568 void printRecipe(raw_ostream &O, const Twine &Indent,
2569 VPSlotTracker &SlotTracker) const override;
2570#endif
2571};
2572
2573/// A recipe for widened phis. Incoming values are operands of the recipe and
2574/// their operand index corresponds to the incoming predecessor block. If the
2575/// recipe is placed in an entry block to a (non-replicate) region, it must have
2576/// exactly 2 incoming values, the first from the predecessor of the region and
2577/// the second from the exiting block of the region.
2579 public VPPhiAccessors {
2580 /// Name to use for the generated IR instruction for the widened phi.
2581 std::string Name;
2582
2583public:
2584 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingvValues,
2585 /// debug location \p DL and \p Name.
2587 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2588 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues, DL),
2589 Name(Name.str()) {}
2590
2592 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2593 }
2594
2595 ~VPWidenPHIRecipe() override = default;
2596
2597 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2598
2599 /// Generate the phi/select nodes.
2600 void execute(VPTransformState &State) override;
2601
2602 /// Return the cost of this VPWidenPHIRecipe.
2604 VPCostContext &Ctx) const override;
2605
2606protected:
2607#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2608 /// Print the recipe.
2609 void printRecipe(raw_ostream &O, const Twine &Indent,
2610 VPSlotTracker &SlotTracker) const override;
2611#endif
2612
2613 const VPRecipeBase *getAsRecipe() const override { return this; }
2614};
2615
2616/// A recipe for handling first-order recurrence phis. The start value is the
2617/// first operand of the recipe and the incoming value from the backedge is the
2618/// second operand.
2621 VPValue &BackedgeValue)
2622 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2623 &Start) {
2624 addOperand(&BackedgeValue);
2625 }
2626
2627 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2628
2633
2634 void execute(VPTransformState &State) override;
2635
2636 /// Return the cost of this first-order recurrence phi recipe.
2638 VPCostContext &Ctx) const override;
2639
2640 /// Returns true if the recipe only uses the first lane of operand \p Op.
2641 bool usesFirstLaneOnly(const VPValue *Op) const override {
2643 "Op must be an operand of the recipe");
2644 return Op == getStartValue();
2645 }
2646
2647protected:
2648#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2649 /// Print the recipe.
2650 void printRecipe(raw_ostream &O, const Twine &Indent,
2651 VPSlotTracker &SlotTracker) const override;
2652#endif
2653};
2654
2655/// Possible variants of a reduction.
2656
2657/// This reduction is ordered and in-loop.
2658struct RdxOrdered {};
2659/// This reduction is in-loop.
2660struct RdxInLoop {};
2661/// This reduction is unordered with the partial result scaled down by some
2662/// factor.
2665};
2666using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2667
2668inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2669 unsigned ScaleFactor) {
2670 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2671 if (Ordered)
2672 return RdxOrdered{};
2673 if (InLoop)
2674 return RdxInLoop{};
2675 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2676}
2677
2678/// A recipe for handling reduction phis. The start value is the first operand
2679/// of the recipe and the incoming value from the backedge is the second
2680/// operand.
2682 /// The recurrence kind of the reduction.
2683 const RecurKind Kind;
2684
2685 ReductionStyle Style;
2686
2687 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2688 /// patterns for argmin/argmax).
2689 /// TODO: Also support cases where the phi itself has a single use, but its
2690 /// compare has multiple uses.
2691 bool HasUsesOutsideReductionChain;
2692
2693public:
2694 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2696 VPValue &BackedgeValue, ReductionStyle Style,
2697 const VPIRFlags &Flags,
2698 bool HasUsesOutsideReductionChain = false)
2699 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2700 VPIRFlags(Flags), Kind(Kind), Style(Style),
2701 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2702 addOperand(&BackedgeValue);
2703 }
2704
2705 ~VPReductionPHIRecipe() override = default;
2706
2708 return new VPReductionPHIRecipe(
2710 *getOperand(0), *getBackedgeValue(), Style, *this,
2711 HasUsesOutsideReductionChain);
2712 }
2713
2714 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2715
2716 /// Generate the phi/select nodes.
2717 void execute(VPTransformState &State) override;
2718
2719 /// Get the factor that the VF of this recipe's output should be scaled by, or
2720 /// 1 if it isn't scaled.
2721 unsigned getVFScaleFactor() const {
2722 auto *Partial = std::get_if<RdxUnordered>(&Style);
2723 return Partial ? Partial->VFScaleFactor : 1;
2724 }
2725
2726 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2727 /// > 1.
2728 void setVFScaleFactor(unsigned ScaleFactor) {
2729 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2730 Style = RdxUnordered{ScaleFactor};
2731 }
2732
2733 /// Returns the number of incoming values, also number of incoming blocks.
2734 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2735 /// incoming value, its start value.
2736 unsigned getNumIncoming() const override { return 2; }
2737
2738 /// Returns the recurrence kind of the reduction.
2739 RecurKind getRecurrenceKind() const { return Kind; }
2740
2741 /// Returns true, if the phi is part of an ordered reduction.
2742 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2743
2744 /// Returns true if the phi is part of an in-loop reduction.
2745 bool isInLoop() const {
2746 return std::holds_alternative<RdxInLoop>(Style) ||
2747 std::holds_alternative<RdxOrdered>(Style);
2748 }
2749
2750 /// Returns true if the reduction outputs a vector with a scaled down VF.
2751 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2752
2753 /// Returns true, if the phi is part of a multi-use reduction.
2755 return HasUsesOutsideReductionChain;
2756 }
2757
2758 /// Returns true if the recipe only uses the first lane of operand \p Op.
2759 bool usesFirstLaneOnly(const VPValue *Op) const override {
2761 "Op must be an operand of the recipe");
2762 return isOrdered() || isInLoop();
2763 }
2764
2765protected:
2766#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2767 /// Print the recipe.
2768 void printRecipe(raw_ostream &O, const Twine &Indent,
2769 VPSlotTracker &SlotTracker) const override;
2770#endif
2771};
2772
2773/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2774/// instructions.
2776public:
2777 /// The blend operation is a User of the incoming values and of their
2778 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2779 /// be omitted (implied by passing an odd number of operands) in which case
2780 /// all other incoming values are merged into it.
2782 const VPIRFlags &Flags, DebugLoc DL)
2783 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2784 assert(Operands.size() >= 2 && "Expected at least two operands!");
2785 setUnderlyingValue(Phi);
2786 }
2787
2788 VPBlendRecipe *clone() override {
2790 operands(), *this, getDebugLoc());
2791 }
2792
2793 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2794
2795 /// A normalized blend is one that has an odd number of operands, whereby the
2796 /// first operand does not have an associated mask.
2797 bool isNormalized() const { return getNumOperands() % 2; }
2798
2799 /// Return the number of incoming values, taking into account when normalized
2800 /// the first incoming value will have no mask.
2801 unsigned getNumIncomingValues() const {
2802 return (getNumOperands() + isNormalized()) / 2;
2803 }
2804
2805 /// Return incoming value number \p Idx.
2806 VPValue *getIncomingValue(unsigned Idx) const {
2807 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2808 }
2809
2810 /// Return mask number \p Idx.
2811 VPValue *getMask(unsigned Idx) const {
2812 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2813 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2814 }
2815
2816 /// Set mask number \p Idx to \p V.
2817 void setMask(unsigned Idx, VPValue *V) {
2818 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2819 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2820 }
2821
2822 void execute(VPTransformState &State) override {
2823 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2824 }
2825
2826 /// Return the cost of this VPWidenMemoryRecipe.
2827 InstructionCost computeCost(ElementCount VF,
2828 VPCostContext &Ctx) const override;
2829
2830 /// Returns true if the recipe only uses the first lane of operand \p Op.
2831 bool usesFirstLaneOnly(const VPValue *Op) const override;
2832
2833protected:
2834#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2835 /// Print the recipe.
2836 void printRecipe(raw_ostream &O, const Twine &Indent,
2837 VPSlotTracker &SlotTracker) const override;
2838#endif
2839};
2840
2841/// A common base class for interleaved memory operations.
2842/// An Interleaved memory operation is a memory access method that combines
2843/// multiple strided loads/stores into a single wide load/store with shuffles.
2844/// The first operand is the start address. The optional operands are, in order,
2845/// the stored values and the mask.
2847 public VPIRMetadata {
2849
2850 /// Indicates if the interleave group is in a conditional block and requires a
2851 /// mask.
2852 bool HasMask = false;
2853
2854 /// Indicates if gaps between members of the group need to be masked out or if
2855 /// unusued gaps can be loaded speculatively.
2856 bool NeedsMaskForGaps = false;
2857
2858protected:
2859 VPInterleaveBase(const unsigned char SC,
2861 ArrayRef<VPValue *> Operands,
2862 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2863 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2864 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2865 NeedsMaskForGaps(NeedsMaskForGaps) {
2866 // TODO: extend the masked interleaved-group support to reversed access.
2867 assert((!Mask || !IG->isReverse()) &&
2868 "Reversed masked interleave-group not supported.");
2869 if (StoredValues.empty()) {
2870 for (Instruction *Inst : IG->members()) {
2871 assert(!Inst->getType()->isVoidTy() && "must have result");
2872 new VPRecipeValue(this, Inst);
2873 }
2874 } else {
2875 for (auto *SV : StoredValues)
2876 addOperand(SV);
2877 }
2878 if (Mask) {
2879 HasMask = true;
2880 addOperand(Mask);
2881 }
2882 }
2883
2884public:
2885 VPInterleaveBase *clone() override = 0;
2886
2887 static inline bool classof(const VPRecipeBase *R) {
2888 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2889 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2890 }
2891
2892 static inline bool classof(const VPUser *U) {
2893 auto *R = dyn_cast<VPRecipeBase>(U);
2894 return R && classof(R);
2895 }
2896
2897 /// Return the address accessed by this recipe.
2898 VPValue *getAddr() const {
2899 return getOperand(0); // Address is the 1st, mandatory operand.
2900 }
2901
2902 /// Return the mask used by this recipe. Note that a full mask is represented
2903 /// by a nullptr.
2904 VPValue *getMask() const {
2905 // Mask is optional and the last operand.
2906 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2907 }
2908
2909 /// Return true if the access needs a mask because of the gaps.
2910 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2911
2913
2914 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2915
2916 void execute(VPTransformState &State) override {
2917 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2918 }
2919
2920 /// Return the cost of this recipe.
2921 InstructionCost computeCost(ElementCount VF,
2922 VPCostContext &Ctx) const override;
2923
2924 /// Returns true if the recipe only uses the first lane of operand \p Op.
2925 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2926
2927 /// Returns the number of stored operands of this interleave group. Returns 0
2928 /// for load interleave groups.
2929 virtual unsigned getNumStoreOperands() const = 0;
2930
2931 /// Return the VPValues stored by this interleave group. If it is a load
2932 /// interleave group, return an empty ArrayRef.
2934 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2936 }
2937};
2938
2939/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2940/// or stores into one wide load/store and shuffles. The first operand of a
2941/// VPInterleave recipe is the address, followed by the stored values, followed
2942/// by an optional mask.
2944public:
2946 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2947 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2948 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2949 Mask, NeedsMaskForGaps, MD, DL) {}
2950
2951 ~VPInterleaveRecipe() override = default;
2952
2956 needsMaskForGaps(), *this, getDebugLoc());
2957 }
2958
2959 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2960
2961 /// Generate the wide load or store, and shuffles.
2962 void execute(VPTransformState &State) override;
2963
2964 bool usesFirstLaneOnly(const VPValue *Op) const override {
2966 "Op must be an operand of the recipe");
2967 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2968 }
2969
2970 unsigned getNumStoreOperands() const override {
2971 return getNumOperands() - (getMask() ? 2 : 1);
2972 }
2973
2974protected:
2975#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2976 /// Print the recipe.
2977 void printRecipe(raw_ostream &O, const Twine &Indent,
2978 VPSlotTracker &SlotTracker) const override;
2979#endif
2980};
2981
2982/// A recipe for interleaved memory operations with vector-predication
2983/// intrinsics. The first operand is the address, the second operand is the
2984/// explicit vector length. Stored values and mask are optional operands.
2986public:
2988 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2989 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2990 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2991 R.getDebugLoc()) {
2992 assert(!getInterleaveGroup()->isReverse() &&
2993 "Reversed interleave-group with tail folding is not supported.");
2994 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2995 "supported for scalable vector.");
2996 }
2997
2998 ~VPInterleaveEVLRecipe() override = default;
2999
3001 llvm_unreachable("cloning not implemented yet");
3002 }
3003
3004 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3005
3006 /// The VPValue of the explicit vector length.
3007 VPValue *getEVL() const { return getOperand(1); }
3008
3009 /// Generate the wide load or store, and shuffles.
3010 void execute(VPTransformState &State) override;
3011
3012 /// The recipe only uses the first lane of the address, and EVL operand.
3013 bool usesFirstLaneOnly(const VPValue *Op) const override {
3015 "Op must be an operand of the recipe");
3016 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3017 Op == getEVL();
3018 }
3019
3020 unsigned getNumStoreOperands() const override {
3021 return getNumOperands() - (getMask() ? 3 : 2);
3022 }
3023
3024protected:
3025#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3026 /// Print the recipe.
3027 void printRecipe(raw_ostream &O, const Twine &Indent,
3028 VPSlotTracker &SlotTracker) const override;
3029#endif
3030};
3031
3032/// A recipe to represent inloop, ordered or partial reduction operations. It
3033/// performs a reduction on a vector operand into a scalar (vector in the case
3034/// of a partial reduction) value, and adds the result to a chain. The Operands
3035/// are {ChainOp, VecOp, [Condition]}.
3037
3038 /// The recurrence kind for the reduction in question.
3039 RecurKind RdxKind;
3040 /// Whether the reduction is conditional.
3041 bool IsConditional = false;
3042 ReductionStyle Style;
3043
3044protected:
3045 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3047 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3048 ReductionStyle Style, DebugLoc DL)
3049 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3050 Style(Style) {
3051 if (CondOp) {
3052 IsConditional = true;
3053 addOperand(CondOp);
3054 }
3056 }
3057
3058public:
3060 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3062 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3063 {ChainOp, VecOp}, CondOp, Style, DL) {}
3064
3066 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3068 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3069 {ChainOp, VecOp}, CondOp, Style, DL) {}
3070
3071 ~VPReductionRecipe() override = default;
3072
3074 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3076 getCondOp(), Style, getDebugLoc());
3077 }
3078
3079 static inline bool classof(const VPRecipeBase *R) {
3080 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3081 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3082 }
3083
3084 static inline bool classof(const VPUser *U) {
3085 auto *R = dyn_cast<VPRecipeBase>(U);
3086 return R && classof(R);
3087 }
3088
3089 static inline bool classof(const VPValue *VPV) {
3090 const VPRecipeBase *R = VPV->getDefiningRecipe();
3091 return R && classof(R);
3092 }
3093
3094 static inline bool classof(const VPSingleDefRecipe *R) {
3095 return classof(static_cast<const VPRecipeBase *>(R));
3096 }
3097
3098 /// Generate the reduction in the loop.
3099 void execute(VPTransformState &State) override;
3100
3101 /// Return the cost of VPReductionRecipe.
3102 InstructionCost computeCost(ElementCount VF,
3103 VPCostContext &Ctx) const override;
3104
3105 /// Return the recurrence kind for the in-loop reduction.
3106 RecurKind getRecurrenceKind() const { return RdxKind; }
3107 /// Return true if the in-loop reduction is ordered.
3108 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3109 /// Return true if the in-loop reduction is conditional.
3110 bool isConditional() const { return IsConditional; };
3111 /// Returns true if the reduction outputs a vector with a scaled down VF.
3112 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3113 /// Returns true if the reduction is in-loop.
3114 bool isInLoop() const {
3115 return std::holds_alternative<RdxInLoop>(Style) ||
3116 std::holds_alternative<RdxOrdered>(Style);
3117 }
3118 /// The VPValue of the scalar Chain being accumulated.
3119 VPValue *getChainOp() const { return getOperand(0); }
3120 /// The VPValue of the vector value to be reduced.
3121 VPValue *getVecOp() const { return getOperand(1); }
3122 /// The VPValue of the condition for the block.
3124 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3125 }
3126 /// Get the factor that the VF of this recipe's output should be scaled by, or
3127 /// 1 if it isn't scaled.
3128 unsigned getVFScaleFactor() const {
3129 auto *Partial = std::get_if<RdxUnordered>(&Style);
3130 return Partial ? Partial->VFScaleFactor : 1;
3131 }
3132
3133protected:
3134#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3135 /// Print the recipe.
3136 void printRecipe(raw_ostream &O, const Twine &Indent,
3137 VPSlotTracker &SlotTracker) const override;
3138#endif
3139};
3140
3141/// A recipe to represent inloop reduction operations with vector-predication
3142/// intrinsics, performing a reduction on a vector operand with the explicit
3143/// vector length (EVL) into a scalar value, and adding the result to a chain.
3144/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3146public:
3149 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3150 R.getFastMathFlags(),
3152 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3153 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3154 DL) {}
3155
3156 ~VPReductionEVLRecipe() override = default;
3157
3159 llvm_unreachable("cloning not implemented yet");
3160 }
3161
3162 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3163
3164 /// Generate the reduction in the loop
3165 void execute(VPTransformState &State) override;
3166
3167 /// The VPValue of the explicit vector length.
3168 VPValue *getEVL() const { return getOperand(2); }
3169
3170 /// Returns true if the recipe only uses the first lane of operand \p Op.
3171 bool usesFirstLaneOnly(const VPValue *Op) const override {
3173 "Op must be an operand of the recipe");
3174 return Op == getEVL();
3175 }
3176
3177protected:
3178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3179 /// Print the recipe.
3180 void printRecipe(raw_ostream &O, const Twine &Indent,
3181 VPSlotTracker &SlotTracker) const override;
3182#endif
3183};
3184
3185/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3186/// copies of the original scalar type, one per lane, instead of producing a
3187/// single copy of widened type for all lanes. If the instruction is known to be
3188/// a single scalar, only one copy will be generated.
3190 public VPIRMetadata {
3191 /// Indicator if only a single replica per lane is needed.
3192 bool IsSingleScalar;
3193
3194 /// Indicator if the replicas are also predicated.
3195 bool IsPredicated;
3196
3197public:
3199 bool IsSingleScalar, VPValue *Mask = nullptr,
3200 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3201 DebugLoc DL = DebugLoc::getUnknown())
3202 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3203 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3204 IsPredicated(Mask) {
3205 setUnderlyingValue(I);
3206 if (Mask)
3207 addOperand(Mask);
3208 }
3209
3210 ~VPReplicateRecipe() override = default;
3211
3213 auto *Copy = new VPReplicateRecipe(
3214 getUnderlyingInstr(), operands(), IsSingleScalar,
3215 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3216 Copy->transferFlags(*this);
3217 return Copy;
3218 }
3219
3220 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3221
3222 /// Generate replicas of the desired Ingredient. Replicas will be generated
3223 /// for all parts and lanes unless a specific part and lane are specified in
3224 /// the \p State.
3225 void execute(VPTransformState &State) override;
3226
3227 /// Return the cost of this VPReplicateRecipe.
3228 InstructionCost computeCost(ElementCount VF,
3229 VPCostContext &Ctx) const override;
3230
3231 bool isSingleScalar() const { return IsSingleScalar; }
3232
3233 bool isPredicated() const { return IsPredicated; }
3234
3235 /// Returns true if the recipe only uses the first lane of operand \p Op.
3236 bool usesFirstLaneOnly(const VPValue *Op) const override {
3238 "Op must be an operand of the recipe");
3239 return isSingleScalar();
3240 }
3241
3242 /// Returns true if the recipe uses scalars of operand \p Op.
3243 bool usesScalars(const VPValue *Op) const override {
3245 "Op must be an operand of the recipe");
3246 return true;
3247 }
3248
3249 /// Return the mask of a predicated VPReplicateRecipe.
3251 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3252 return getOperand(getNumOperands() - 1);
3253 }
3254
3255 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3256
3257protected:
3258#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3259 /// Print the recipe.
3260 void printRecipe(raw_ostream &O, const Twine &Indent,
3261 VPSlotTracker &SlotTracker) const override;
3262#endif
3263};
3264
3265/// A recipe for generating conditional branches on the bits of a mask.
3267public:
3269 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3270
3273 }
3274
3275 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3276
3277 /// Generate the extraction of the appropriate bit from the block mask and the
3278 /// conditional branch.
3279 void execute(VPTransformState &State) override;
3280
3281 /// Return the cost of this VPBranchOnMaskRecipe.
3282 InstructionCost computeCost(ElementCount VF,
3283 VPCostContext &Ctx) const override;
3284
3285#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3286 /// Print the recipe.
3287 void printRecipe(raw_ostream &O, const Twine &Indent,
3288 VPSlotTracker &SlotTracker) const override {
3289 O << Indent << "BRANCH-ON-MASK ";
3291 }
3292#endif
3293
3294 /// Returns true if the recipe uses scalars of operand \p Op.
3295 bool usesScalars(const VPValue *Op) const override {
3297 "Op must be an operand of the recipe");
3298 return true;
3299 }
3300};
3301
3302/// A recipe to combine multiple recipes into a single 'expression' recipe,
3303/// which should be considered a single entity for cost-modeling and transforms.
3304/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3305/// expression recipes, before execute. The individual expression recipes are
3306/// completely disconnected from the def-use graph of other recipes not part of
3307/// the expression. Def-use edges between pairs of expression recipes remain
3308/// intact, whereas every edge between an expression recipe and a recipe outside
3309/// the expression is elevated to connect the non-expression recipe with the
3310/// VPExpressionRecipe itself.
3311class VPExpressionRecipe : public VPSingleDefRecipe {
3312 /// Recipes included in this VPExpressionRecipe. This could contain
3313 /// duplicates.
3314 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3315
3316 /// Temporary VPValues used for external operands of the expression, i.e.
3317 /// operands not defined by recipes in the expression.
3318 SmallVector<VPValue *> LiveInPlaceholders;
3319
3320 enum class ExpressionTypes {
3321 /// Represents an inloop extended reduction operation, performing a
3322 /// reduction on an extended vector operand into a scalar value, and adding
3323 /// the result to a chain.
3324 ExtendedReduction,
3325 /// Represent an inloop multiply-accumulate reduction, multiplying the
3326 /// extended vector operands, performing a reduction.add on the result, and
3327 /// adding the scalar result to a chain.
3328 ExtMulAccReduction,
3329 /// Represent an inloop multiply-accumulate reduction, multiplying the
3330 /// vector operands, performing a reduction.add on the result, and adding
3331 /// the scalar result to a chain.
3332 MulAccReduction,
3333 /// Represent an inloop multiply-accumulate reduction, multiplying the
3334 /// extended vector operands, negating the multiplication, performing a
3335 /// reduction.add on the result, and adding the scalar result to a chain.
3336 ExtNegatedMulAccReduction,
3337 };
3338
3339 /// Type of the expression.
3340 ExpressionTypes ExpressionType;
3341
3342 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3343 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3344 /// in the expression) are replaced by temporary VPValues and the original
3345 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3346 /// as needed (excluding last) to ensure they are only used by other recipes
3347 /// in the expression.
3348 VPExpressionRecipe(ExpressionTypes ExpressionType,
3349 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3350
3351public:
3353 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3355 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3358 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3359 {Ext0, Ext1, Mul, Red}) {}
3362 VPReductionRecipe *Red)
3363 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3364 {Ext0, Ext1, Mul, Sub, Red}) {
3365 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3366 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3367 "Expected an add reduction");
3368 assert(getNumOperands() >= 3 && "Expected at least three operands");
3369 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3370 assert(SubConst && SubConst->isZero() &&
3371 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3372 }
3373
3375 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3376 for (auto *R : reverse(ExpressionRecipes)) {
3377 if (ExpressionRecipesSeen.insert(R).second)
3378 delete R;
3379 }
3380 for (VPValue *T : LiveInPlaceholders)
3381 delete T;
3382 }
3383
3384 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3385
3386 VPExpressionRecipe *clone() override {
3387 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3388 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3389 for (auto *R : ExpressionRecipes)
3390 NewExpressiondRecipes.push_back(R->clone());
3391 for (auto *New : NewExpressiondRecipes) {
3392 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3393 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3394 // Update placeholder operands in the cloned recipe to use the external
3395 // operands, to be internalized when the cloned expression is constructed.
3396 for (const auto &[Placeholder, OutsideOp] :
3397 zip(LiveInPlaceholders, operands()))
3398 New->replaceUsesOfWith(Placeholder, OutsideOp);
3399 }
3400 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3401 }
3402
3403 /// Return the VPValue to use to infer the result type of the recipe.
3405 unsigned OpIdx =
3406 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3407 : 1;
3408 return getOperand(getNumOperands() - OpIdx);
3409 }
3410
3411 /// Insert the recipes of the expression back into the VPlan, directly before
3412 /// the current recipe. Leaves the expression recipe empty, which must be
3413 /// removed before codegen.
3414 void decompose();
3415
3416 unsigned getVFScaleFactor() const {
3417 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3418 return PR ? PR->getVFScaleFactor() : 1;
3419 }
3420
3421 /// Method for generating code, must not be called as this recipe is abstract.
3422 void execute(VPTransformState &State) override {
3423 llvm_unreachable("recipe must be removed before execute");
3424 }
3425
3427 VPCostContext &Ctx) const override;
3428
3429 /// Returns true if this expression contains recipes that may read from or
3430 /// write to memory.
3431 bool mayReadOrWriteMemory() const;
3432
3433 /// Returns true if this expression contains recipes that may have side
3434 /// effects.
3435 bool mayHaveSideEffects() const;
3436
3437 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3438 bool isSingleScalar() const;
3439
3440protected:
3441#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3442 /// Print the recipe.
3443 void printRecipe(raw_ostream &O, const Twine &Indent,
3444 VPSlotTracker &SlotTracker) const override;
3445#endif
3446};
3447
3448/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3449/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3450/// order to merge values that are set under such a branch and feed their uses.
3451/// The phi nodes can be scalar or vector depending on the users of the value.
3452/// This recipe works in concert with VPBranchOnMaskRecipe.
3454public:
3455 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3456 /// nodes after merging back from a Branch-on-Mask.
3458 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3459 ~VPPredInstPHIRecipe() override = default;
3460
3462 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3463 }
3464
3465 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3466
3467 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3468 /// retain SSA form.
3469 void execute(VPTransformState &State) override;
3470
3471 /// Return the cost of this VPPredInstPHIRecipe.
3473 VPCostContext &Ctx) const override {
3474 // TODO: Compute accurate cost after retiring the legacy cost model.
3475 return 0;
3476 }
3477
3478 /// Returns true if the recipe uses scalars of operand \p Op.
3479 bool usesScalars(const VPValue *Op) const override {
3481 "Op must be an operand of the recipe");
3482 return true;
3483 }
3484
3485protected:
3486#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3487 /// Print the recipe.
3488 void printRecipe(raw_ostream &O, const Twine &Indent,
3489 VPSlotTracker &SlotTracker) const override;
3490#endif
3491};
3492
3493/// A common base class for widening memory operations. An optional mask can be
3494/// provided as the last operand.
3496 public VPIRMetadata {
3497protected:
3499
3500 /// Alignment information for this memory access.
3502
3503 /// Whether the accessed addresses are consecutive.
3505
3506 /// Whether the memory access is masked.
3507 bool IsMasked = false;
3508
3509 void setMask(VPValue *Mask) {
3510 assert(!IsMasked && "cannot re-set mask");
3511 if (!Mask)
3512 return;
3513 addOperand(Mask);
3514 IsMasked = true;
3515 }
3516
3517 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3518 std::initializer_list<VPValue *> Operands,
3519 bool Consecutive, const VPIRMetadata &Metadata,
3520 DebugLoc DL)
3521 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3523
3524public:
3526 llvm_unreachable("cloning not supported");
3527 }
3528
3529 static inline bool classof(const VPRecipeBase *R) {
3530 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3531 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3532 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3533 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3534 }
3535
3536 static inline bool classof(const VPUser *U) {
3537 auto *R = dyn_cast<VPRecipeBase>(U);
3538 return R && classof(R);
3539 }
3540
3541 /// Return whether the loaded-from / stored-to addresses are consecutive.
3542 bool isConsecutive() const { return Consecutive; }
3543
3544 /// Return the address accessed by this recipe.
3545 VPValue *getAddr() const { return getOperand(0); }
3546
3547 /// Returns true if the recipe is masked.
3548 bool isMasked() const { return IsMasked; }
3549
3550 /// Return the mask used by this recipe. Note that a full mask is represented
3551 /// by a nullptr.
3552 VPValue *getMask() const {
3553 // Mask is optional and therefore the last operand.
3554 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3555 }
3556
3557 /// Returns the alignment of the memory access.
3558 Align getAlign() const { return Alignment; }
3559
3560 /// Generate the wide load/store.
3561 void execute(VPTransformState &State) override {
3562 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3563 }
3564
3565 /// Return the cost of this VPWidenMemoryRecipe.
3566 InstructionCost computeCost(ElementCount VF,
3567 VPCostContext &Ctx) const override;
3568
3570};
3571
3572/// A recipe for widening load operations, using the address to load from and an
3573/// optional mask.
3575 public VPRecipeValue {
3577 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3578 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3579 Consecutive, Metadata, DL),
3580 VPRecipeValue(this, &Load) {
3581 setMask(Mask);
3582 }
3583
3586 getMask(), Consecutive, *this, getDebugLoc());
3587 }
3588
3589 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3590
3591 /// Generate a wide load or gather.
3592 void execute(VPTransformState &State) override;
3593
3594 /// Returns true if the recipe only uses the first lane of operand \p Op.
3595 bool usesFirstLaneOnly(const VPValue *Op) const override {
3597 "Op must be an operand of the recipe");
3598 // Widened, consecutive loads operations only demand the first lane of
3599 // their address.
3600 return Op == getAddr() && isConsecutive();
3601 }
3602
3603protected:
3604#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3605 /// Print the recipe.
3606 void printRecipe(raw_ostream &O, const Twine &Indent,
3607 VPSlotTracker &SlotTracker) const override;
3608#endif
3609};
3610
3611/// A recipe for widening load operations with vector-predication intrinsics,
3612/// using the address to load from, the explicit vector length and an optional
3613/// mask.
3615 public VPRecipeValue {
3617 VPValue *Mask)
3618 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3619 {Addr, &EVL}, L.isConsecutive(), L,
3620 L.getDebugLoc()),
3621 VPRecipeValue(this, &getIngredient()) {
3622 setMask(Mask);
3623 }
3624
3625 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3626
3627 /// Return the EVL operand.
3628 VPValue *getEVL() const { return getOperand(1); }
3629
3630 /// Generate the wide load or gather.
3631 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3632
3633 /// Return the cost of this VPWidenLoadEVLRecipe.
3635 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3636
3637 /// Returns true if the recipe only uses the first lane of operand \p Op.
3638 bool usesFirstLaneOnly(const VPValue *Op) const override {
3640 "Op must be an operand of the recipe");
3641 // Widened loads only demand the first lane of EVL and consecutive loads
3642 // only demand the first lane of their address.
3643 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3644 }
3645
3646protected:
3647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3648 /// Print the recipe.
3649 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3650 VPSlotTracker &SlotTracker) const override;
3651#endif
3652};
3653
3654/// A recipe for widening store operations, using the stored value, the address
3655/// to store to and an optional mask.
3657 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3658 VPValue *Mask, bool Consecutive,
3659 const VPIRMetadata &Metadata, DebugLoc DL)
3660 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3661 {Addr, StoredVal}, Consecutive, Metadata, DL) {
3662 setMask(Mask);
3663 }
3664
3668 *this, getDebugLoc());
3669 }
3670
3671 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3672
3673 /// Return the value stored by this recipe.
3674 VPValue *getStoredValue() const { return getOperand(1); }
3675
3676 /// Generate a wide store or scatter.
3677 void execute(VPTransformState &State) override;
3678
3679 /// Returns true if the recipe only uses the first lane of operand \p Op.
3680 bool usesFirstLaneOnly(const VPValue *Op) const override {
3682 "Op must be an operand of the recipe");
3683 // Widened, consecutive stores only demand the first lane of their address,
3684 // unless the same operand is also stored.
3685 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3686 }
3687
3688protected:
3689#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3690 /// Print the recipe.
3691 void printRecipe(raw_ostream &O, const Twine &Indent,
3692 VPSlotTracker &SlotTracker) const override;
3693#endif
3694};
3695
3696/// A recipe for widening store operations with vector-predication intrinsics,
3697/// using the value to store, the address to store to, the explicit vector
3698/// length and an optional mask.
3701 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3702 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3703 {Addr, StoredVal, &EVL}, S.isConsecutive(), S,
3704 S.getDebugLoc()) {
3705 setMask(Mask);
3706 }
3707
3708 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3709
3710 /// Return the address accessed by this recipe.
3711 VPValue *getStoredValue() const { return getOperand(1); }
3712
3713 /// Return the EVL operand.
3714 VPValue *getEVL() const { return getOperand(2); }
3715
3716 /// Generate the wide store or scatter.
3717 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3718
3719 /// Return the cost of this VPWidenStoreEVLRecipe.
3721 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3722
3723 /// Returns true if the recipe only uses the first lane of operand \p Op.
3724 bool usesFirstLaneOnly(const VPValue *Op) const override {
3726 "Op must be an operand of the recipe");
3727 if (Op == getEVL()) {
3728 assert(getStoredValue() != Op && "unexpected store of EVL");
3729 return true;
3730 }
3731 // Widened, consecutive memory operations only demand the first lane of
3732 // their address, unless the same operand is also stored. That latter can
3733 // happen with opaque pointers.
3734 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3735 }
3736
3737protected:
3738#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3739 /// Print the recipe.
3740 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3741 VPSlotTracker &SlotTracker) const override;
3742#endif
3743};
3744
3745/// Recipe to expand a SCEV expression.
3747 const SCEV *Expr;
3748
3749public:
3751 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3752
3753 ~VPExpandSCEVRecipe() override = default;
3754
3755 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3756
3757 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3758
3759 void execute(VPTransformState &State) override {
3760 llvm_unreachable("SCEV expressions must be expanded before final execute");
3761 }
3762
3763 /// Return the cost of this VPExpandSCEVRecipe.
3765 VPCostContext &Ctx) const override {
3766 // TODO: Compute accurate cost after retiring the legacy cost model.
3767 return 0;
3768 }
3769
3770 const SCEV *getSCEV() const { return Expr; }
3771
3772protected:
3773#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3774 /// Print the recipe.
3775 void printRecipe(raw_ostream &O, const Twine &Indent,
3776 VPSlotTracker &SlotTracker) const override;
3777#endif
3778};
3779
3780/// A recipe for generating the active lane mask for the vector loop that is
3781/// used to predicate the vector operations.
3783public:
3785 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3786 StartMask, DL) {}
3787
3788 ~VPActiveLaneMaskPHIRecipe() override = default;
3789
3792 if (getNumOperands() == 2)
3793 R->addOperand(getOperand(1));
3794 return R;
3795 }
3796
3797 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3798
3799 /// Generate the active lane mask phi of the vector loop.
3800 void execute(VPTransformState &State) override;
3801
3802protected:
3803#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3804 /// Print the recipe.
3805 void printRecipe(raw_ostream &O, const Twine &Indent,
3806 VPSlotTracker &SlotTracker) const override;
3807#endif
3808};
3809
3810/// A recipe for generating the phi node tracking the current scalar iteration
3811/// index. It starts at the start value of the canonical induction and gets
3812/// incremented by the number of scalar iterations processed by the vector loop
3813/// iteration. The increment does not have to be loop invariant.
3815public:
3817 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3818 StartIV, DL) {}
3819
3820 ~VPCurrentIterationPHIRecipe() override = default;
3821
3823 llvm_unreachable("cloning not implemented yet");
3824 }
3825
3826 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3827
3828 void execute(VPTransformState &State) override {
3829 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3830 "scalar phi recipe");
3831 }
3832
3833 /// Return the cost of this VPCurrentIterationPHIRecipe.
3835 VPCostContext &Ctx) const override {
3836 // For now, match the behavior of the legacy cost model.
3837 return 0;
3838 }
3839
3840 /// Returns true if the recipe only uses the first lane of operand \p Op.
3841 bool usesFirstLaneOnly(const VPValue *Op) const override {
3843 "Op must be an operand of the recipe");
3844 return true;
3845 }
3846
3847protected:
3848#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3849 /// Print the recipe.
3850 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3851 VPSlotTracker &SlotTracker) const override;
3852#endif
3853};
3854
3855/// A Recipe for widening the canonical induction variable of the vector loop.
3857 public VPUnrollPartAccessor<1> {
3858public:
3860 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3861
3862 ~VPWidenCanonicalIVRecipe() override = default;
3863
3867
3868 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3869
3870 /// Generate a canonical vector induction variable of the vector loop, with
3871 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3872 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3873 void execute(VPTransformState &State) override;
3874
3875 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3877 VPCostContext &Ctx) const override {
3878 // TODO: Compute accurate cost after retiring the legacy cost model.
3879 return 0;
3880 }
3881
3882 /// Return the canonical IV being widened.
3886
3887protected:
3888#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3889 /// Print the recipe.
3890 void printRecipe(raw_ostream &O, const Twine &Indent,
3891 VPSlotTracker &SlotTracker) const override;
3892#endif
3893};
3894
3895/// A recipe for converting the input value \p IV value to the corresponding
3896/// value of an IV with different start and step values, using Start + IV *
3897/// Step.
3899 /// Kind of the induction.
3901 /// If not nullptr, the floating point induction binary operator. Must be set
3902 /// for floating point inductions.
3903 const FPMathOperator *FPBinOp;
3904
3905public:
3907 VPValue *CanonicalIV, VPValue *Step)
3909 IndDesc.getKind(),
3910 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3911 Start, CanonicalIV, Step) {}
3912
3914 const FPMathOperator *FPBinOp, VPIRValue *Start,
3915 VPValue *IV, VPValue *Step)
3916 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3917 Kind(Kind), FPBinOp(FPBinOp) {}
3918
3919 ~VPDerivedIVRecipe() override = default;
3920
3922 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3923 getStepValue());
3924 }
3925
3926 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3927
3928 void execute(VPTransformState &State) override {
3929 llvm_unreachable("Expected prior expansion of this recipe");
3930 }
3931
3932 /// Return the cost of this VPDerivedIVRecipe.
3934 VPCostContext &Ctx) const override {
3935 // TODO: Compute accurate cost after retiring the legacy cost model.
3936 return 0;
3937 }
3938
3939 Type *getScalarType() const { return getStartValue()->getType(); }
3940
3942 VPValue *getIndex() const { return getOperand(1); }
3943 VPValue *getStepValue() const { return getOperand(2); }
3944 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
3946
3947 /// Returns true if the recipe only uses the first lane of operand \p Op.
3948 bool usesFirstLaneOnly(const VPValue *Op) const override {
3950 "Op must be an operand of the recipe");
3951 return true;
3952 }
3953
3954protected:
3955#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3956 /// Print the recipe.
3957 void printRecipe(raw_ostream &O, const Twine &Indent,
3958 VPSlotTracker &SlotTracker) const override;
3959#endif
3960};
3961
3962/// A recipe for handling phi nodes of integer and floating-point inductions,
3963/// producing their scalar values. Before unrolling by UF the recipe represents
3964/// the VF*UF scalar values to be produced, or UF scalar values if only first
3965/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3966/// operand StartIndex to all unroll parts except part 0, as the recipe
3967/// represents the VF scalar values (this number of values is taken from
3968/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3970 Instruction::BinaryOps InductionOpcode;
3971
3972public:
3975 DebugLoc DL)
3976 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
3977 FMFs, DL),
3978 InductionOpcode(Opcode) {}
3979
3981 VPValue *Step, VPValue *VF,
3984 IV, Step, VF, IndDesc.getInductionOpcode(),
3985 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3986 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3987 : FastMathFlags(),
3988 DL) {}
3989
3990 ~VPScalarIVStepsRecipe() override = default;
3991
3993 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
3994 getOperand(2), InductionOpcode,
3996 if (VPValue *StartIndex = getStartIndex())
3997 NewR->setStartIndex(StartIndex);
3998 return NewR;
3999 }
4000
4001 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4002
4003 /// Generate the scalarized versions of the phi node as needed by their users.
4004 void execute(VPTransformState &State) override;
4005
4006 /// Return the cost of this VPScalarIVStepsRecipe.
4008 VPCostContext &Ctx) const override {
4009 // TODO: Compute accurate cost after retiring the legacy cost model.
4010 return 0;
4011 }
4012
4013 VPValue *getStepValue() const { return getOperand(1); }
4014
4015 /// Return the number of scalars to produce per unroll part, used to compute
4016 /// StartIndex during unrolling.
4017 VPValue *getVFValue() const { return getOperand(2); }
4018
4019 /// Return the StartIndex, or null if known to be zero, valid only after
4020 /// unrolling.
4022 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4023 }
4024
4025 /// Set or add the StartIndex operand.
4026 void setStartIndex(VPValue *StartIndex) {
4027 if (getNumOperands() == 4)
4028 setOperand(3, StartIndex);
4029 else
4030 addOperand(StartIndex);
4031 }
4032
4033 /// Returns true if the recipe only uses the first lane of operand \p Op.
4034 bool usesFirstLaneOnly(const VPValue *Op) const override {
4036 "Op must be an operand of the recipe");
4037 return true;
4038 }
4039
4040 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4041
4042protected:
4043#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4044 /// Print the recipe.
4045 void printRecipe(raw_ostream &O, const Twine &Indent,
4046 VPSlotTracker &SlotTracker) const override;
4047#endif
4048};
4049
4050/// Support casting from VPRecipeBase -> VPPhiAccessors.
4051template <>
4055 /// Used by isa.
4056 static inline bool isPossible(VPRecipeBase *R) {
4057 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4059 }
4060
4061 /// Used by cast.
4063 switch (R->getVPRecipeID()) {
4064 case VPRecipeBase::VPInstructionSC:
4065 return cast<VPPhi>(R);
4066 case VPRecipeBase::VPIRInstructionSC:
4067 return cast<VPIRPhi>(R);
4068 case VPRecipeBase::VPWidenPHISC:
4069 return cast<VPWidenPHIRecipe>(R);
4070 default:
4071 return cast<VPHeaderPHIRecipe>(R);
4072 }
4073 }
4074
4075 /// Used by inherited doCastIfPossible to dyn_cast.
4076 static inline VPPhiAccessors *castFailed() { return nullptr; }
4077};
4078
4079template <>
4084template <>
4086 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4087 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4088
4089/// Support casting from VPRecipeBase -> VPIRMetadata.
4090template <>
4094 /// Used by isa.
4095 static inline bool isPossible(VPRecipeBase *R) {
4096 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here.
4101 R);
4102 }
4103
4104 /// Used by cast.
4105 static inline VPIRMetadata *doCast(VPRecipeBase *R) {
4106 switch (R->getVPRecipeID()) {
4107 case VPRecipeBase::VPInstructionSC:
4108 return cast<VPInstruction>(R);
4109 case VPRecipeBase::VPWidenSC:
4110 return cast<VPWidenRecipe>(R);
4111 case VPRecipeBase::VPWidenCastSC:
4112 return cast<VPWidenCastRecipe>(R);
4113 case VPRecipeBase::VPWidenIntrinsicSC:
4115 case VPRecipeBase::VPWidenCallSC:
4116 return cast<VPWidenCallRecipe>(R);
4117 case VPRecipeBase::VPReplicateSC:
4118 return cast<VPReplicateRecipe>(R);
4119 case VPRecipeBase::VPInterleaveSC:
4120 case VPRecipeBase::VPInterleaveEVLSC:
4121 return cast<VPInterleaveBase>(R);
4122 case VPRecipeBase::VPWidenLoadSC:
4123 case VPRecipeBase::VPWidenLoadEVLSC:
4124 case VPRecipeBase::VPWidenStoreSC:
4125 case VPRecipeBase::VPWidenStoreEVLSC:
4126 return cast<VPWidenMemoryRecipe>(R);
4127 default:
4128 llvm_unreachable("Illegal recipe for VPIRMetadata cast");
4129 }
4130 }
4131
4132 /// Used by inherited doCastIfPossible to dyn_cast.
4133 static inline VPIRMetadata *castFailed() { return nullptr; }
4134};
4135
4136template <>
4141template <>
4143 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4144 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4145
4146/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4147/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4148/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4149class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4150 friend class VPlan;
4151
4152 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4153 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4154 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4155 if (Recipe)
4156 appendRecipe(Recipe);
4157 }
4158
4159public:
4161
4162protected:
4163 /// The VPRecipes held in the order of output instructions to generate.
4165
4166 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4167 : VPBlockBase(BlockSC, Name.str()) {}
4168
4169public:
4170 ~VPBasicBlock() override {
4171 while (!Recipes.empty())
4172 Recipes.pop_back();
4173 }
4174
4175 /// Instruction iterators...
4180
4181 //===--------------------------------------------------------------------===//
4182 /// Recipe iterator methods
4183 ///
4184 inline iterator begin() { return Recipes.begin(); }
4185 inline const_iterator begin() const { return Recipes.begin(); }
4186 inline iterator end() { return Recipes.end(); }
4187 inline const_iterator end() const { return Recipes.end(); }
4188
4189 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4190 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4191 inline reverse_iterator rend() { return Recipes.rend(); }
4192 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4193
4194 inline size_t size() const { return Recipes.size(); }
4195 inline bool empty() const { return Recipes.empty(); }
4196 inline const VPRecipeBase &front() const { return Recipes.front(); }
4197 inline VPRecipeBase &front() { return Recipes.front(); }
4198 inline const VPRecipeBase &back() const { return Recipes.back(); }
4199 inline VPRecipeBase &back() { return Recipes.back(); }
4200
4201 /// Returns a reference to the list of recipes.
4203
4204 /// Returns a pointer to a member of the recipe list.
4205 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4206 return &VPBasicBlock::Recipes;
4207 }
4208
4209 /// Method to support type inquiry through isa, cast, and dyn_cast.
4210 static inline bool classof(const VPBlockBase *V) {
4211 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4212 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4213 }
4214
4215 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4216 assert(Recipe && "No recipe to append.");
4217 assert(!Recipe->Parent && "Recipe already in VPlan");
4218 Recipe->Parent = this;
4219 Recipes.insert(InsertPt, Recipe);
4220 }
4221
4222 /// Augment the existing recipes of a VPBasicBlock with an additional
4223 /// \p Recipe as the last recipe.
4224 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4225
4226 /// The method which generates the output IR instructions that correspond to
4227 /// this VPBasicBlock, thereby "executing" the VPlan.
4228 void execute(VPTransformState *State) override;
4229
4230 /// Return the cost of this VPBasicBlock.
4231 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4232
4233 /// Return the position of the first non-phi node recipe in the block.
4234 iterator getFirstNonPhi();
4235
4236 /// Returns an iterator range over the PHI-like recipes in the block.
4240
4241 /// Split current block at \p SplitAt by inserting a new block between the
4242 /// current block and its successors and moving all recipes starting at
4243 /// SplitAt to the new block. Returns the new block.
4244 VPBasicBlock *splitAt(iterator SplitAt);
4245
4246 VPRegionBlock *getEnclosingLoopRegion();
4247 const VPRegionBlock *getEnclosingLoopRegion() const;
4248
4249#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4250 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4251 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4252 ///
4253 /// Note that the numbering is applied to the whole VPlan, so printing
4254 /// individual blocks is consistent with the whole VPlan printing.
4255 void print(raw_ostream &O, const Twine &Indent,
4256 VPSlotTracker &SlotTracker) const override;
4257 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4258#endif
4259
4260 /// If the block has multiple successors, return the branch recipe terminating
4261 /// the block. If there are no or only a single successor, return nullptr;
4262 VPRecipeBase *getTerminator();
4263 const VPRecipeBase *getTerminator() const;
4264
4265 /// Returns true if the block is exiting it's parent region.
4266 bool isExiting() const;
4267
4268 /// Clone the current block and it's recipes, without updating the operands of
4269 /// the cloned recipes.
4270 VPBasicBlock *clone() override;
4271
4272 /// Returns the predecessor block at index \p Idx with the predecessors as per
4273 /// the corresponding plain CFG. If the block is an entry block to a region,
4274 /// the first predecessor is the single predecessor of a region, and the
4275 /// second predecessor is the exiting block of the region.
4276 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4277
4278protected:
4279 /// Execute the recipes in the IR basic block \p BB.
4280 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4281
4282 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4283 /// generated for this VPBB.
4284 void connectToPredecessors(VPTransformState &State);
4285
4286private:
4287 /// Create an IR BasicBlock to hold the output instructions generated by this
4288 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4289 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4290};
4291
4292inline const VPBasicBlock *
4294 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4295}
4296
4297/// A special type of VPBasicBlock that wraps an existing IR basic block.
4298/// Recipes of the block get added before the first non-phi instruction in the
4299/// wrapped block.
4300/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4301/// preheader block.
4302class VPIRBasicBlock : public VPBasicBlock {
4303 friend class VPlan;
4304
4305 BasicBlock *IRBB;
4306
4307 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4308 VPIRBasicBlock(BasicBlock *IRBB)
4309 : VPBasicBlock(VPIRBasicBlockSC,
4310 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4311 IRBB(IRBB) {}
4312
4313public:
4314 ~VPIRBasicBlock() override = default;
4315
4316 static inline bool classof(const VPBlockBase *V) {
4317 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4318 }
4319
4320 /// The method which generates the output IR instructions that correspond to
4321 /// this VPBasicBlock, thereby "executing" the VPlan.
4322 void execute(VPTransformState *State) override;
4323
4324 VPIRBasicBlock *clone() override;
4325
4326 BasicBlock *getIRBasicBlock() const { return IRBB; }
4327};
4328
4329/// Track information about the canonical IV value of a region.
4330/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4332 /// VPRegionValue for the canonical IV, whose allocation is managed by
4333 /// VPCanonicalIVInfo.
4334 std::unique_ptr<VPRegionValue> CanIV;
4335
4336 /// Whether the increment of the canonical IV may unsigned wrap or not.
4337 bool HasNUW = true;
4338
4339public:
4341 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4342
4343 VPRegionValue *getRegionValue() { return CanIV.get(); }
4344 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4345
4346 bool hasNUW() const { return HasNUW; }
4347
4348 void clearNUW() { HasNUW = false; }
4349};
4350
4351/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4352/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4353/// A VPRegionBlock may indicate that its contents are to be replicated several
4354/// times. This is designed to support predicated scalarization, in which a
4355/// scalar if-then code structure needs to be generated VF * UF times. Having
4356/// this replication indicator helps to keep a single model for multiple
4357/// candidate VF's. The actual replication takes place only once the desired VF
4358/// and UF have been determined.
4359class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4360 friend class VPlan;
4361
4362 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4363 VPBlockBase *Entry;
4364
4365 /// Hold the Single Exiting block of the SESE region modelled by the
4366 /// VPRegionBlock.
4367 VPBlockBase *Exiting;
4368
4369 /// Holds the Canonical IV of the loop region along with additional
4370 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4371 /// Loop regions retain their canonical IVs until they are dissolved, even if
4372 /// the canonical IV has no users.
4373 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4374
4375 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4376 /// VPRegionBlocks.
4377 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4378 const std::string &Name = "")
4379 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4380 if (Entry) {
4381 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4382 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4383 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4384 Entry->setParent(this);
4385 Exiting->setParent(this);
4386 }
4387 }
4388
4389 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4390 VPBlockBase *Exiting, const std::string &Name = "")
4391 : VPRegionBlock(Entry, Exiting, Name) {
4392 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4393 }
4394
4395public:
4396 ~VPRegionBlock() override = default;
4397
4398 /// Method to support type inquiry through isa, cast, and dyn_cast.
4399 static inline bool classof(const VPBlockBase *V) {
4400 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4401 }
4402
4403 const VPBlockBase *getEntry() const { return Entry; }
4404 VPBlockBase *getEntry() { return Entry; }
4405
4406 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4407 /// EntryBlock must have no predecessors.
4408 void setEntry(VPBlockBase *EntryBlock) {
4409 assert(!EntryBlock->hasPredecessors() &&
4410 "Entry block cannot have predecessors.");
4411 Entry = EntryBlock;
4412 EntryBlock->setParent(this);
4413 }
4414
4415 const VPBlockBase *getExiting() const { return Exiting; }
4416 VPBlockBase *getExiting() { return Exiting; }
4417
4418 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4419 /// ExitingBlock must have no successors.
4420 void setExiting(VPBlockBase *ExitingBlock) {
4421 assert(!ExitingBlock->hasSuccessors() &&
4422 "Exit block cannot have successors.");
4423 Exiting = ExitingBlock;
4424 ExitingBlock->setParent(this);
4425 }
4426
4427 /// Returns the pre-header VPBasicBlock of the loop region.
4429 assert(!isReplicator() && "should only get pre-header of loop regions");
4430 return getSinglePredecessor()->getExitingBasicBlock();
4431 }
4432
4433 /// An indicator whether this region is to generate multiple replicated
4434 /// instances of output IR corresponding to its VPBlockBases.
4435 bool isReplicator() const { return !CanIVInfo; }
4436
4437 /// The method which generates the output IR instructions that correspond to
4438 /// this VPRegionBlock, thereby "executing" the VPlan.
4439 void execute(VPTransformState *State) override;
4440
4441 // Return the cost of this region.
4442 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4443
4444#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4445 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4446 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4447 /// consequtive numbers.
4448 ///
4449 /// Note that the numbering is applied to the whole VPlan, so printing
4450 /// individual regions is consistent with the whole VPlan printing.
4451 void print(raw_ostream &O, const Twine &Indent,
4452 VPSlotTracker &SlotTracker) const override;
4453 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4454#endif
4455
4456 /// Clone all blocks in the single-entry single-exit region of the block and
4457 /// their recipes without updating the operands of the cloned recipes.
4458 VPRegionBlock *clone() override;
4459
4460 /// Remove the current region from its VPlan, connecting its predecessor to
4461 /// its entry, and its exiting block to its successor.
4462 void dissolveToCFGLoop();
4463
4464 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4465 /// a new increment before the terminator and return it. The canonical IV
4466 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4467 VPInstruction *getOrCreateCanonicalIVIncrement();
4468
4469 /// Return the canonical induction variable of the region, null for
4470 /// replicating regions.
4472 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4473 }
4475 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4476 }
4477
4478 /// Return the type of the canonical IV for loop regions.
4480 return CanIVInfo->getRegionValue()->getType();
4481 }
4482
4483 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4484 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4485
4486 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4488 assert(Increment && "Must provide increment to clear");
4489 Increment->dropPoisonGeneratingFlags();
4490 CanIVInfo->clearNUW();
4491 }
4492};
4493
4495 return getParent()->getParent();
4496}
4497
4499 return getParent()->getParent();
4500}
4501
4502/// VPlan models a candidate for vectorization, encoding various decisions take
4503/// to produce efficient output IR, including which branches, basic-blocks and
4504/// output IR instructions to generate, and their cost. VPlan holds a
4505/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4506/// VPBasicBlock.
4507class VPlan {
4508 friend class VPlanPrinter;
4509 friend class VPSlotTracker;
4510
4511 /// VPBasicBlock corresponding to the original preheader. Used to place
4512 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4513 /// rest of VPlan execution.
4514 /// When this VPlan is used for the epilogue vector loop, the entry will be
4515 /// replaced by a new entry block created during skeleton creation.
4516 VPBasicBlock *Entry;
4517
4518 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4519 VPIRBasicBlock *ScalarHeader;
4520
4521 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4522 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4523 /// e.g. if the scalar epilogue always executes.
4525
4526 /// Holds the VFs applicable to this VPlan.
4528
4529 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4530 /// any UF.
4532
4533 /// Holds the name of the VPlan, for printing.
4534 std::string Name;
4535
4536 /// Represents the trip count of the original loop, for folding
4537 /// the tail.
4538 VPValue *TripCount = nullptr;
4539
4540 /// Represents the backedge taken count of the original loop, for folding
4541 /// the tail. It equals TripCount - 1.
4542 VPSymbolicValue *BackedgeTakenCount = nullptr;
4543
4544 /// Represents the vector trip count.
4545 VPSymbolicValue VectorTripCount;
4546
4547 /// Represents the vectorization factor of the loop.
4548 VPSymbolicValue VF;
4549
4550 /// Represents the unroll factor of the loop.
4551 VPSymbolicValue UF;
4552
4553 /// Represents the loop-invariant VF * UF of the vector loop region.
4554 VPSymbolicValue VFxUF;
4555
4556 /// Contains all the external definitions created for this VPlan, as a mapping
4557 /// from IR Values to VPIRValues.
4559
4560 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4561 /// VPlan is destroyed.
4562 SmallVector<VPBlockBase *> CreatedBlocks;
4563
4564 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4565 /// wrapping the original header of the scalar loop.
4566 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
4567 : Entry(Entry), ScalarHeader(ScalarHeader) {
4568 Entry->setPlan(this);
4569 assert(ScalarHeader->getNumSuccessors() == 0 &&
4570 "scalar header must be a leaf node");
4571 }
4572
4573public:
4574 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4575 /// original preheader and scalar header of \p L, to be used as entry and
4576 /// scalar header blocks of the new VPlan.
4577 VPlan(Loop *L);
4578
4579 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4580 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
4581 VPlan(BasicBlock *ScalarHeaderBB) {
4582 setEntry(createVPBasicBlock("preheader"));
4583 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4584 }
4585
4587
4589 Entry = VPBB;
4590 VPBB->setPlan(this);
4591 }
4592
4593 /// Generate the IR code for this VPlan.
4594 void execute(VPTransformState *State);
4595
4596 /// Return the cost of this plan.
4598
4599 VPBasicBlock *getEntry() { return Entry; }
4600 const VPBasicBlock *getEntry() const { return Entry; }
4601
4602 /// Returns the preheader of the vector loop region, if one exists, or null
4603 /// otherwise.
4605 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4606 return VectorRegion
4607 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4608 : nullptr;
4609 }
4610
4611 /// Returns the VPRegionBlock of the vector loop.
4614
4615 /// Returns the 'middle' block of the plan, that is the block that selects
4616 /// whether to execute the scalar tail loop or the exit block from the loop
4617 /// latch. If there is an early exit from the vector loop, the middle block
4618 /// conceptully has the early exit block as third successor, split accross 2
4619 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4620 /// tail loop or the exit block. If the scalar tail loop or exit block are
4621 /// known to always execute, the middle block may branch directly to that
4622 /// block. This function cannot be called once the vector loop region has been
4623 /// removed.
4625 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4626 assert(
4627 LoopRegion &&
4628 "cannot call the function after vector loop region has been removed");
4629 // The middle block is always the last successor of the region.
4630 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4631 }
4632
4634 return const_cast<VPlan *>(this)->getMiddleBlock();
4635 }
4636
4637 /// Return the VPBasicBlock for the preheader of the scalar loop.
4640 getScalarHeader()->getSinglePredecessor());
4641 }
4642
4643 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4644 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4645
4646 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4647 /// the original scalar loop.
4648 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4649
4650 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4651 /// exit block.
4653
4654 /// Returns true if \p VPBB is an exit block.
4655 bool isExitBlock(VPBlockBase *VPBB);
4656
4657 /// The trip count of the original loop.
4659 assert(TripCount && "trip count needs to be set before accessing it");
4660 return TripCount;
4661 }
4662
4663 /// Set the trip count assuming it is currently null; if it is not - use
4664 /// resetTripCount().
4665 void setTripCount(VPValue *NewTripCount) {
4666 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4667 TripCount = NewTripCount;
4668 }
4669
4670 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4671 /// the original trip count have been replaced.
4672 void resetTripCount(VPValue *NewTripCount) {
4673 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4674 "TripCount must be set when resetting");
4675 TripCount = NewTripCount;
4676 }
4677
4678 /// The backedge taken count of the original loop.
4680 if (!BackedgeTakenCount)
4681 BackedgeTakenCount = new VPSymbolicValue();
4682 return BackedgeTakenCount;
4683 }
4684 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4685
4686 /// The vector trip count.
4687 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4688
4689 /// Returns the VF of the vector loop region.
4690 VPSymbolicValue &getVF() { return VF; };
4691 const VPSymbolicValue &getVF() const { return VF; };
4692
4693 /// Returns the UF of the vector loop region.
4694 VPSymbolicValue &getUF() { return UF; };
4695
4696 /// Returns VF * UF of the vector loop region.
4697 VPSymbolicValue &getVFxUF() { return VFxUF; }
4698
4701 }
4702
4703 const DataLayout &getDataLayout() const {
4705 }
4706
4707 void addVF(ElementCount VF) { VFs.insert(VF); }
4708
4710 assert(hasVF(VF) && "Cannot set VF not already in plan");
4711 VFs.clear();
4712 VFs.insert(VF);
4713 }
4714
4715 /// Remove \p VF from the plan.
4717 assert(hasVF(VF) && "tried to remove VF not present in plan");
4718 VFs.remove(VF);
4719 }
4720
4721 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4722 bool hasScalableVF() const {
4723 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4724 }
4725
4726 /// Returns an iterator range over all VFs of the plan.
4729 return VFs;
4730 }
4731
4732 /// Returns the single VF of the plan, asserting that the plan has exactly
4733 /// one VF.
4735 assert(VFs.size() == 1 && "expected plan with single VF");
4736 return VFs[0];
4737 }
4738
4739 bool hasScalarVFOnly() const {
4740 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4741 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4742 "Plan with scalar VF should only have a single VF");
4743 return HasScalarVFOnly;
4744 }
4745
4746 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4747
4748 /// Returns the concrete UF of the plan, after unrolling.
4749 unsigned getConcreteUF() const {
4750 assert(UFs.size() == 1 && "Expected a single UF");
4751 return UFs[0];
4752 }
4753
4754 void setUF(unsigned UF) {
4755 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4756 UFs.clear();
4757 UFs.insert(UF);
4758 }
4759
4760 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4761 /// concrete UF.
4762 bool isUnrolled() const { return UFs.size() == 1; }
4763
4764 /// Return a string with the name of the plan and the applicable VFs and UFs.
4765 std::string getName() const;
4766
4767 void setName(const Twine &newName) { Name = newName.str(); }
4768
4769 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4770 /// yet) for \p V.
4772 assert(V && "Trying to get or add the VPIRValue of a null Value");
4773 auto [It, Inserted] = LiveIns.try_emplace(V);
4774 if (Inserted) {
4775 if (auto *CI = dyn_cast<ConstantInt>(V))
4776 It->second = new VPConstantInt(CI);
4777 else
4778 It->second = new VPIRValue(V);
4779 }
4780
4781 assert(isa<VPIRValue>(It->second) &&
4782 "Only VPIRValues should be in mapping");
4783 return It->second;
4784 }
4786 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4787 return getOrAddLiveIn(V->getValue());
4788 }
4789
4790 /// Return a VPIRValue wrapping i1 true.
4791 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4792
4793 /// Return a VPIRValue wrapping i1 false.
4794 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4795
4796 /// Return a VPIRValue wrapping the null value of type \p Ty.
4797 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4798
4799 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4801 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4802 }
4803
4804 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4805 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4806 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4807 }
4808
4809 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4810 /// value.
4812 bool IsSigned = false) {
4813 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4814 }
4815
4816 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4818 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4819 }
4820
4821 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4822 /// otherwise.
4823 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4824
4825 /// Return the list of live-in VPValues available in the VPlan.
4826 auto getLiveIns() const { return LiveIns.values(); }
4827
4828#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4829 /// Print the live-ins of this VPlan to \p O.
4830 void printLiveIns(raw_ostream &O) const;
4831
4832 /// Print this VPlan to \p O.
4833 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4834
4835 /// Print this VPlan in DOT format to \p O.
4836 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4837
4838 /// Dump the plan to stderr (for debugging).
4839 LLVM_DUMP_METHOD void dump() const;
4840#endif
4841
4842 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4843 /// recipes to refer to the clones, and return it.
4845
4846 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4847 /// present. The returned block is owned by the VPlan and deleted once the
4848 /// VPlan is destroyed.
4850 VPRecipeBase *Recipe = nullptr) {
4851 auto *VPB = new VPBasicBlock(Name, Recipe);
4852 CreatedBlocks.push_back(VPB);
4853 return VPB;
4854 }
4855
4856 /// Create a new loop region with a canonical IV using \p CanIVTy and
4857 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
4858 /// to \p Entry and \p Exiting respectively, if provided. The returned block
4859 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4861 const std::string &Name = "",
4862 VPBlockBase *Entry = nullptr,
4863 VPBlockBase *Exiting = nullptr) {
4864 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
4865 CreatedBlocks.push_back(VPB);
4866 return VPB;
4867 }
4868
4869 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4870 /// returned block is owned by the VPlan and deleted once the VPlan is
4871 /// destroyed.
4873 const std::string &Name = "") {
4874 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4875 CreatedBlocks.push_back(VPB);
4876 return VPB;
4877 }
4878
4879 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4880 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4881 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4883
4884 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4885 /// instructions in \p IRBB, except its terminator which is managed by the
4886 /// successors of the block in VPlan. The returned block is owned by the VPlan
4887 /// and deleted once the VPlan is destroyed.
4889
4890 /// Returns true if the VPlan is based on a loop with an early exit. That is
4891 /// the case if the VPlan has either more than one exit block or a single exit
4892 /// block with multiple predecessors (one for the exit via the latch and one
4893 /// via the other early exit).
4894 bool hasEarlyExit() const {
4895 return count_if(ExitBlocks,
4896 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4897 1 ||
4898 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4899 }
4900
4901 /// Returns true if the scalar tail may execute after the vector loop, i.e.
4902 /// if the middle block is a predecessor of the scalar preheader. Note that
4903 /// this relies on unneeded branches to the scalar tail loop being removed.
4904 bool hasScalarTail() const {
4905 auto *ScalarPH = getScalarPreheader();
4906 return ScalarPH &&
4907 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
4908 }
4909};
4910
4911#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4912inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4913 Plan.print(OS);
4914 return OS;
4915}
4916#endif
4917
4918} // end namespace llvm
4919
4920#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:586
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:221
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3790
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3784
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4149
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4177
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4224
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4179
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4176
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4202
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4160
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4166
iterator end()
Definition VPlan.h:4186
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4184
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4178
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4237
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:754
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:233
~VPBasicBlock() override
Definition VPlan.h:4170
const_reverse_iterator rbegin() const
Definition VPlan.h:4190
reverse_iterator rend()
Definition VPlan.h:4191
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4164
VPRecipeBase & back()
Definition VPlan.h:4199
const VPRecipeBase & front() const
Definition VPlan.h:4196
const_iterator begin() const
Definition VPlan.h:4185
VPRecipeBase & front()
Definition VPlan.h:4197
const VPRecipeBase & back() const
Definition VPlan.h:4198
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4215
bool empty() const
Definition VPlan.h:4195
const_iterator end() const
Definition VPlan.h:4187
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4210
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4205
reverse_iterator rbegin()
Definition VPlan.h:4189
friend class VPlan
Definition VPlan.h:4150
size_t size() const
Definition VPlan.h:4194
const_reverse_iterator rend() const
Definition VPlan.h:4192
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2806
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2811
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2781
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2801
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2822
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2788
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2817
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2797
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:97
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:318
VPRegionBlock * getParent()
Definition VPlan.h:189
VPBlocksTy & getPredecessors()
Definition VPlan.h:226
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:223
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:388
void setName(const Twine &newName)
Definition VPlan.h:182
size_t getNumSuccessors() const
Definition VPlan.h:240
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:222
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:220
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:340
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:652
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:176
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:276
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:353
size_t getNumPredecessors() const
Definition VPlan.h:241
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:309
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:225
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:346
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:218
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:225
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:174
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:197
const VPRegionBlock * getParent() const
Definition VPlan.h:190
const std::string & getName() const
Definition VPlan.h:180
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:328
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:266
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:300
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:236
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:260
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:325
friend class VPBlockUtils
Definition VPlan.h:98
unsigned getVPBlockID() const
Definition VPlan.h:187
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:367
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:332
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:166
VPBlocksTy & getSuccessors()
Definition VPlan.h:215
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:217
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:289
void setParent(VPRegionBlock *P)
Definition VPlan.h:200
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:282
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:230
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:214
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3287
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3271
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3295
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3268
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4343
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4340
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4344
bool hasNUW() const
Definition VPlan.h:4346
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3822
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3816
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3834
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3828
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3841
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:3945
VPValue * getIndex() const
Definition VPlan.h:3942
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:3944
VPIRValue * getStartValue() const
Definition VPlan.h:3941
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3933
VPValue * getStepValue() const
Definition VPlan.h:3943
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3928
Type * getScalarType() const
Definition VPlan.h:3939
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3921
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:3913
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3948
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:3906
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3759
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3764
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3750
const SCEV * getSCEV() const
Definition VPlan.h:3770
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3755
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3422
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3404
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3386
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3374
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3360
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3352
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3356
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3416
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3354
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2297
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2301
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2314
static bool classof(const VPValue *V)
Definition VPlan.h:2311
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2337
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2342
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2326
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2334
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2307
VPValue * getStartValue() const
Definition VPlan.h:2329
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2346
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2052
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2069
unsigned getOpcode() const
Definition VPlan.h:2065
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2045
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4302
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:462
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4326
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4316
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4303
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:487
Class to record and manage LLVM IR flags.
Definition VPlan.h:687
FastMathFlagsTy FMFs
Definition VPlan.h:775
ReductionFlagsTy ReductionFlags
Definition VPlan.h:777
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1030
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:868
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:848
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:834
WrapFlagsTy WrapFlags
Definition VPlan.h:769
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:827
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:992
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1056
TruncFlagsTy TruncFlags
Definition VPlan.h:770
CmpInst::Predicate getPredicate() const
Definition VPlan.h:964
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1040
uint8_t AllFlags[2]
Definition VPlan.h:778
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1000
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:873
ExactFlagsTy ExactFlags
Definition VPlan.h:772
bool hasNoSignedWrap() const
Definition VPlan.h:1019
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1044
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:839
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:844
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:853
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:822
uint8_t GEPFlagsStorage
Definition VPlan.h:773
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:858
bool isNonNeg() const
Definition VPlan.h:1002
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:982
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:987
DisjointFlagsTy DisjointFlags
Definition VPlan.h:771
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:972
bool hasNoUnsignedWrap() const
Definition VPlan.h:1008
FCmpFlagsTy FCmpFlags
Definition VPlan.h:776
NonNegFlagsTy NonNegFlags
Definition VPlan.h:774
bool isReductionInLoop() const
Definition VPlan.h:1062
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:884
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:921
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:863
uint8_t CmpPredStorage
Definition VPlan.h:768
RecurKind getRecurKind() const
Definition VPlan.h:1050
VPIRFlags(Instruction &I)
Definition VPlan.h:784
Instruction & getInstruction() const
Definition VPlan.h:1711
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1719
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1698
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1725
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1713
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1686
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1167
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1203
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1175
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1187
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1516
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1558
static bool classof(const VPUser *R)
Definition VPlan.h:1543
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1524
Type * getResultType() const
Definition VPlan.h:1564
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1547
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1222
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1448
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1468
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1389
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1328
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1319
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1335
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1309
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1322
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1262
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1313
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1257
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1254
@ VScale
Returns the value for vscale.
Definition VPlan.h:1331
@ CanonicalIVIncrementForPart
Definition VPlan.h:1238
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1265
bool hasResult() const
Definition VPlan.h:1413
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1471
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1453
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1493
unsigned getOpcode() const
Definition VPlan.h:1397
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1496
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1462
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1438
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2910
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2916
static bool classof(const VPUser *U)
Definition VPlan.h:2892
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2859
Instruction * getInsertPos() const
Definition VPlan.h:2914
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2887
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2912
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2904
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2933
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2898
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2985
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3013
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3007
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3020
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3000
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2987
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2943
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2970
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2953
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2964
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2945
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1576
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1605
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1600
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4293
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1625
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1585
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1610
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1614
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3479
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3461
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3472
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3457
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:405
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:548
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4494
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:559
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:479
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:553
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:528
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:407
const VPBasicBlock * getParent() const
Definition VPlan.h:480
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:533
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:423
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:525
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:469
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:303
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:144
friend class VPValue
Definition VPlanValue.h:304
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3168
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3147
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3171
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3158
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2742
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2728
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2707
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2721
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2754
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2736
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2695
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2745
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2759
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2751
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2739
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3036
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3045
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3110
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3079
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3094
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3121
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3123
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3106
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3059
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3108
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3065
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3112
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3119
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3114
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3073
static bool classof(const VPUser *U)
Definition VPlan.h:3084
static bool classof(const VPValue *VPV)
Definition VPlan.h:3089
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3128
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4359
const VPBlockBase * getEntry() const
Definition VPlan.h:4403
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4435
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4420
VPBlockBase * getExiting()
Definition VPlan.h:4416
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4474
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4408
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4479
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4484
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4487
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4471
const VPBlockBase * getExiting() const
Definition VPlan.h:4415
VPBlockBase * getEntry()
Definition VPlan.h:4404
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4428
friend class VPlan
Definition VPlan.h:4360
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4399
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:209
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3190
bool isSingleScalar() const
Definition VPlan.h:3231
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3198
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3243
bool isPredicated() const
Definition VPlan.h:3233
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3212
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3236
unsigned getOpcode() const
Definition VPlan.h:3255
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3250
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4040
VPValue * getStepValue() const
Definition VPlan.h:4013
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4007
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3980
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4026
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3992
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4021
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4017
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3973
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4034
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:605
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:611
static bool classof(const VPValue *V)
Definition VPlan.h:659
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:672
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:615
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:675
static bool classof(const VPUser *U)
Definition VPlan.h:664
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:607
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1155
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:329
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1515
operand_range operands()
Definition VPlanValue.h:397
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:373
unsigned getNumOperands() const
Definition VPlanValue.h:367
operand_iterator op_end()
Definition VPlanValue.h:395
operand_iterator op_begin()
Definition VPlanValue.h:393
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:368
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:348
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:391
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:390
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:138
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:128
friend class VPRecipeValue
Definition VPlanValue.h:52
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:202
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2178
VPValue * getVFValue() const
Definition VPlan.h:2167
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2164
int64_t getStride() const
Definition VPlan.h:2165
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2199
VPValue * getOffset() const
Definition VPlan.h:2168
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2192
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2154
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2185
VPValue * getPointer() const
Definition VPlan.h:2166
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2236
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2238
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2245
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2223
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2261
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2252
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1984
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1991
const_operand_range args() const
Definition VPlan.h:2025
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2006
operand_range args()
Definition VPlan.h:2024
Function * getCalledScalarFunction() const
Definition VPlan.h:2020
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV)
Definition VPlan.h:3859
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3876
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:3883
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3864
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1832
Instruction::CastOps getOpcode() const
Definition VPlan.h:1870
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1873
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1840
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1855
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2116
Type * getSourceElementType() const
Definition VPlan.h:2121
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2124
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2108
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2094
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2429
static bool classof(const VPValue *V)
Definition VPlan.h:2377
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2396
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2414
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2389
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2404
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2407
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2365
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2392
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2412
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2421
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2372
const VPValue * getVFValue() const
Definition VPlan.h:2399
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2382
const VPValue * getStepValue() const
Definition VPlan.h:2393
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2490
const TruncInst * getTruncInst() const
Definition VPlan.h:2506
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2484
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2494
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2476
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2450
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2505
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2459
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2521
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2501
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2514
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1884
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1915
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1955
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1964
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1901
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1970
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1936
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1967
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1958
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3507
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3542
static bool classof(const VPUser *U)
Definition VPlan.h:3536
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3561
Instruction & Ingredient
Definition VPlan.h:3498
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3525
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3517
Instruction & getIngredient() const
Definition VPlan.h:3569
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3504
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3529
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3552
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3501
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3548
void setMask(VPValue *Mask)
Definition VPlan.h:3509
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3558
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3545
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2613
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2591
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingvValues, debug location DL and Name.
Definition VPlan.h:2586
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2548
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2557
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2538
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1776
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1796
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1823
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1780
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1788
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1813
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4507
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4823
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1167
friend class VPSlotTracker
Definition VPlan.h:4509
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1143
bool hasVF(ElementCount VF) const
Definition VPlan.h:4721
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4734
const DataLayout & getDataLayout() const
Definition VPlan.h:4703
LLVMContext & getContext() const
Definition VPlan.h:4699
VPBasicBlock * getEntry()
Definition VPlan.h:4599
void setName(const Twine &newName)
Definition VPlan.h:4767
bool hasScalableVF() const
Definition VPlan.h:4722
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4658
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4679
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4728
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:900
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:875
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4785
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:908
const VPBasicBlock * getEntry() const
Definition VPlan.h:4600
friend class VPlanPrinter
Definition VPlan.h:4508
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4794
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4817
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4697
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4800
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4872
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1300
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4826
bool hasUF(unsigned UF) const
Definition VPlan.h:4746
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4648
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4687
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4684
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4771
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4860
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4797
void setVF(ElementCount VF)
Definition VPlan.h:4709
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4762
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1066
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4894
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1048
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4749
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4811
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4633
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4665
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4672
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4624
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4588
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4849
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1306
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4716
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4791
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4604
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1173
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4694
bool hasScalarVFOnly() const
Definition VPlan.h:4739
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4638
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:918
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1126
void addVF(ElementCount VF)
Definition VPlan.h:4707
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4644
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1082
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4690
void setUF(unsigned UF)
Definition VPlan.h:4754
const VPSymbolicValue & getVF() const
Definition VPlan.h:4691
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:4904
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1214
VPlan(BasicBlock *ScalarHeaderBB)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition VPlan.h:4581
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4805
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:557
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1105
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2668
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:82
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:92
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2666
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
static VPIRMetadata * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4105
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4095
static VPIRMetadata * castFailed()
Used by inherited doCastIfPossible to dyn_cast.
Definition VPlan.h:4133
static VPPhiAccessors * castFailed()
Used by inherited doCastIfPossible to dyn_cast.
Definition VPlan.h:4076
static VPPhiAccessors * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4062
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4056
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2660
Possible variants of a reduction.
Definition VPlan.h:2658
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2663
unsigned VFScaleFactor
Definition VPlan.h:2664
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:258
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2629
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2641
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2620
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:719
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:724
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:714
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:707
PHINode & getIRPhi()
Definition VPlan.h:1757
VPIRPhi(PHINode &PN)
Definition VPlan.h:1745
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1747
static bool classof(const VPUser *U)
Definition VPlan.h:1752
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1768
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:142
static bool classof(const VPUser *U)
Definition VPlan.h:1644
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1640
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1659
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1674
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1654
static bool classof(const VPValue *V)
Definition VPlan.h:1649
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1109
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1142
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1115
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1110
static bool classof(const VPValue *V)
Definition VPlan.h:1135
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1130
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:280
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3615
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3628
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3616
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3638
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3575
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3576
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3595
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3584
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3699
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3711
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3700
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3724
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3714
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3656
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3657
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3674
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3665
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3680