LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class SCEVPredicate;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPDominatorTree;
65class VPRegionBlock;
66class VPlan;
67class VPLane;
69class Value;
71
72struct VPCostContext;
73
74using VPlanPtr = std::unique_ptr<VPlan>;
75
76/// \enum UncountableExitStyle
77/// Different methods of handling early exits.
78///
81 /// No side effects to worry about, so we can process any uncountable exits
82 /// in the loop and branch either to the middle block if the trip count was
83 /// reached, or an early exitblock to determine which exit was taken.
85 /// All memory operations other than the load(s) required to determine whether
86 /// an uncountable exit occurre will be masked based on that condition. If an
87 /// uncountable exit is taken, then all lanes before the exiting lane will
88 /// complete, leaving just the final lane to execute in the scalar tail.
90};
91
92/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
93/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
95 friend class VPBlockUtils;
96
97 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
98
99 /// An optional name for the block.
100 std::string Name;
101
102 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
103 /// it is a topmost VPBlockBase.
104 VPRegionBlock *Parent = nullptr;
105
106 /// List of predecessor blocks.
108
109 /// List of successor blocks.
111
112 /// VPlan containing the block. Can only be set on the entry block of the
113 /// plan.
114 VPlan *Plan = nullptr;
115
116 /// Add \p Successor as the last successor to this block.
117 void appendSuccessor(VPBlockBase *Successor) {
118 assert(Successor && "Cannot add nullptr successor!");
119 Successors.push_back(Successor);
120 }
121
122 /// Add \p Predecessor as the last predecessor to this block.
123 void appendPredecessor(VPBlockBase *Predecessor) {
124 assert(Predecessor && "Cannot add nullptr predecessor!");
125 Predecessors.push_back(Predecessor);
126 }
127
128 /// Remove \p Predecessor from the predecessors of this block.
129 void removePredecessor(VPBlockBase *Predecessor) {
130 auto Pos = find(Predecessors, Predecessor);
131 assert(Pos && "Predecessor does not exist");
132 Predecessors.erase(Pos);
133 }
134
135 /// Remove \p Successor from the successors of this block.
136 void removeSuccessor(VPBlockBase *Successor) {
137 auto Pos = find(Successors, Successor);
138 assert(Pos && "Successor does not exist");
139 Successors.erase(Pos);
140 }
141
142 /// This function replaces one predecessor with another, useful when
143 /// trying to replace an old block in the CFG with a new one.
144 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
145 auto I = find(Predecessors, Old);
146 assert(I != Predecessors.end());
147 assert(Old->getParent() == New->getParent() &&
148 "replaced predecessor must have the same parent");
149 *I = New;
150 }
151
152 /// This function replaces one successor with another, useful when
153 /// trying to replace an old block in the CFG with a new one.
154 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
155 auto I = find(Successors, Old);
156 assert(I != Successors.end());
157 assert(Old->getParent() == New->getParent() &&
158 "replaced successor must have the same parent");
159 *I = New;
160 }
161
162protected:
163 VPBlockBase(const unsigned char SC, const std::string &N)
164 : SubclassID(SC), Name(N) {}
165
166public:
167 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
168 /// that are actually instantiated. Values of this enumeration are kept in the
169 /// SubclassID field of the VPBlockBase objects. They are used for concrete
170 /// type identification.
171 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
172
174
175 virtual ~VPBlockBase() = default;
176
177 const std::string &getName() const { return Name; }
178
179 void setName(const Twine &newName) { Name = newName.str(); }
180
181 /// \return an ID for the concrete type of this object.
182 /// This is used to implement the classof checks. This should not be used
183 /// for any other purpose, as the values may change as LLVM evolves.
184 unsigned getVPBlockID() const { return SubclassID; }
185
186 VPRegionBlock *getParent() { return Parent; }
187 const VPRegionBlock *getParent() const { return Parent; }
188
189 /// \return A pointer to the plan containing the current block.
190 VPlan *getPlan();
191 const VPlan *getPlan() const;
192
193 /// Sets the pointer of the plan containing the block. The block must be the
194 /// entry block into the VPlan.
195 void setPlan(VPlan *ParentPlan);
196
197 void setParent(VPRegionBlock *P) { Parent = P; }
198
199 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
200 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
201 /// VPBlockBase is a VPBasicBlock, it is returned.
202 const VPBasicBlock *getEntryBasicBlock() const;
203 VPBasicBlock *getEntryBasicBlock();
204
205 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
206 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
207 /// VPBlockBase is a VPBasicBlock, it is returned.
208 const VPBasicBlock *getExitingBasicBlock() const;
209 VPBasicBlock *getExitingBasicBlock();
210
211 const VPBlocksTy &getSuccessors() const { return Successors; }
212 VPBlocksTy &getSuccessors() { return Successors; }
213
214 /// Returns true if this block has any successors.
215 bool hasSuccessors() const { return !Successors.empty(); }
216 /// Returns true if this block has any predecessors.
217 bool hasPredecessors() const { return !Predecessors.empty(); }
218
221
222 const VPBlocksTy &getPredecessors() const { return Predecessors; }
223 VPBlocksTy &getPredecessors() { return Predecessors; }
224
225 /// \return the successor of this VPBlockBase if it has a single successor.
226 /// Otherwise return a null pointer.
228 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
229 }
230
231 /// \return the predecessor of this VPBlockBase if it has a single
232 /// predecessor. Otherwise return a null pointer.
234 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
235 }
236
237 size_t getNumSuccessors() const { return Successors.size(); }
238 size_t getNumPredecessors() const { return Predecessors.size(); }
239
240 /// An Enclosing Block of a block B is any block containing B, including B
241 /// itself. \return the closest enclosing block starting from "this", which
242 /// has successors. \return the root enclosing block if all enclosing blocks
243 /// have no successors.
244 VPBlockBase *getEnclosingBlockWithSuccessors();
245
246 /// \return the closest enclosing block starting from "this", which has
247 /// predecessors. \return the root enclosing block if all enclosing blocks
248 /// have no predecessors.
249 VPBlockBase *getEnclosingBlockWithPredecessors();
250
251 /// \return the successors either attached directly to this VPBlockBase or, if
252 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
253 /// successors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has successors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) successors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithSuccessors()->getSuccessors();
259 }
260
261 /// \return the hierarchical successor of this VPBlockBase if it has a single
262 /// hierarchical successor. Otherwise return a null pointer.
264 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
265 }
266
267 /// \return the predecessors either attached directly to this VPBlockBase or,
268 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
269 /// predecessors of its own, search recursively for the first enclosing
270 /// VPRegionBlock that has predecessors and return them. If no such
271 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
272 /// VPBlockBase reached.
274 return getEnclosingBlockWithPredecessors()->getPredecessors();
275 }
276
277 /// \return the hierarchical predecessor of this VPBlockBase if it has a
278 /// single hierarchical predecessor. Otherwise return a null pointer.
282
283 /// Set a given VPBlockBase \p Successor as the single successor of this
284 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
285 /// This VPBlockBase must have no successors.
287 assert(Successors.empty() && "Setting one successor when others exist.");
288 assert(Successor->getParent() == getParent() &&
289 "connected blocks must have the same parent");
290 appendSuccessor(Successor);
291 }
292
293 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
294 /// successors of this VPBlockBase. This VPBlockBase is not added as
295 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
296 /// successors.
297 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
298 assert(Successors.empty() && "Setting two successors when others exist.");
299 appendSuccessor(IfTrue);
300 appendSuccessor(IfFalse);
301 }
302
303 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
304 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
305 /// as successor of any VPBasicBlock in \p NewPreds.
307 assert(Predecessors.empty() && "Block predecessors already set.");
308 for (auto *Pred : NewPreds)
309 appendPredecessor(Pred);
310 }
311
312 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
313 /// This VPBlockBase must have no successors. This VPBlockBase is not added
314 /// as predecessor of any VPBasicBlock in \p NewSuccs.
316 assert(Successors.empty() && "Block successors already set.");
317 for (auto *Succ : NewSuccs)
318 appendSuccessor(Succ);
319 }
320
321 /// Remove all the predecessor of this block.
322 void clearPredecessors() { Predecessors.clear(); }
323
324 /// Remove all the successors of this block.
325 void clearSuccessors() { Successors.clear(); }
326
327 /// Swap predecessors of the block. The block must have exactly 2
328 /// predecessors.
330 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
331 std::swap(Predecessors[0], Predecessors[1]);
332 }
333
334 /// Swap successors of the block. The block must have exactly 2 successors.
335 // TODO: This should be part of introducing conditional branch recipes rather
336 // than being independent.
338 assert(Successors.size() == 2 && "must have 2 successors to swap");
339 std::swap(Successors[0], Successors[1]);
340 }
341
342 /// Returns the index for \p Pred in the blocks predecessors list.
343 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
344 assert(count(Predecessors, Pred) == 1 &&
345 "must have Pred exactly once in Predecessors");
346 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
347 }
348
349 /// Returns the index for \p Succ in the blocks successor list.
350 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
351 assert(count(Successors, Succ) == 1 &&
352 "must have Succ exactly once in Successors");
353 return std::distance(Successors.begin(), find(Successors, Succ));
354 }
355
356 /// The method which generates the output IR that correspond to this
357 /// VPBlockBase, thereby "executing" the VPlan.
358 virtual void execute(VPTransformState *State) = 0;
359
360 /// Return the cost of the block.
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
364 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
365 OS << getName();
366 }
367
368 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
369 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
370 /// consequtive numbers.
371 ///
372 /// Note that the numbering is applied to the whole VPlan, so printing
373 /// individual blocks is consistent with the whole VPlan printing.
374 virtual void print(raw_ostream &O, const Twine &Indent,
375 VPSlotTracker &SlotTracker) const = 0;
376
377 /// Print plain-text dump of this VPlan to \p O.
378 void print(raw_ostream &O) const;
379
380 /// Print the successors of this block to \p O, prefixing all lines with \p
381 /// Indent.
382 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
383
384 /// Dump this VPBlockBase to dbgs().
385 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
386#endif
387
388 /// Clone the current block and it's recipes without updating the operands of
389 /// the cloned recipes, including all blocks in the single-entry single-exit
390 /// region for VPRegionBlocks.
391 virtual VPBlockBase *clone() = 0;
392};
393
394/// VPRecipeBase is a base class modeling a sequence of one or more output IR
395/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
396/// and is responsible for deleting its defined values. Single-value
397/// recipes must inherit from VPSingleDef instead of inheriting from both
398/// VPRecipeBase and VPValue separately.
400 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
401 public VPDef,
402 public VPUser {
403 friend VPBasicBlock;
404 friend class VPBlockUtils;
405
406 /// Subclass identifier (for isa/dyn_cast).
407 const unsigned char SubclassID;
408
409 /// Each VPRecipe belongs to a single VPBasicBlock.
410 VPBasicBlock *Parent = nullptr;
411
412 /// The debug location for the recipe.
413 DebugLoc DL;
414
415public:
416 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
417 /// that is actually instantiated. Values of this enumeration are kept in the
418 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
419 /// type identification.
420 using VPRecipeTy = enum {
421 VPBranchOnMaskSC,
422 VPDerivedIVSC,
423 VPExpandSCEVSC,
424 VPExpressionSC,
425 VPIRInstructionSC,
426 VPInstructionSC,
427 VPInterleaveEVLSC,
428 VPInterleaveSC,
429 VPReductionEVLSC,
430 VPReductionSC,
431 VPReplicateSC,
432 VPScalarIVStepsSC,
433 VPVectorPointerSC,
434 VPVectorEndPointerSC,
435 VPWidenCallSC,
436 VPWidenCanonicalIVSC,
437 VPWidenCastSC,
438 VPWidenGEPSC,
439 VPWidenIntrinsicSC,
440 VPWidenMemIntrinsicSC,
441 VPWidenLoadEVLSC,
442 VPWidenLoadSC,
443 VPWidenStoreEVLSC,
444 VPWidenStoreSC,
445 VPWidenSC,
446 VPBlendSC,
447 VPHistogramSC,
448 // START: Phi-like recipes. Need to be kept together.
449 VPWidenPHISC,
450 VPPredInstPHISC,
451 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
452 // VPHeaderPHIRecipe need to be kept together.
453 VPCurrentIterationPHISC,
454 VPActiveLaneMaskPHISC,
455 VPFirstOrderRecurrencePHISC,
456 VPWidenIntOrFpInductionSC,
457 VPWidenPointerInductionSC,
458 VPReductionPHISC,
459 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
460 // END: Phi-like recipes
461 VPFirstPHISC = VPWidenPHISC,
462 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
463 VPLastHeaderPHISC = VPReductionPHISC,
464 VPLastPHISC = VPReductionPHISC,
465 };
466
467 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
469 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
470
471 ~VPRecipeBase() override = default;
472
473 /// Clone the current recipe.
474 virtual VPRecipeBase *clone() = 0;
475
476 /// \return the VPBasicBlock which this VPRecipe belongs to.
477 VPBasicBlock *getParent() { return Parent; }
478 const VPBasicBlock *getParent() const { return Parent; }
479
480 /// \return the VPRegionBlock which the recipe belongs to.
481 VPRegionBlock *getRegion();
482 const VPRegionBlock *getRegion() const;
483
484 /// The method which generates the output IR instructions that correspond to
485 /// this VPRecipe, thereby "executing" the VPlan.
486 virtual void execute(VPTransformState &State) = 0;
487
488 /// Return the cost of this recipe, taking into account if the cost
489 /// computation should be skipped and the ForceTargetInstructionCost flag.
490 /// Also takes care of printing the cost for debugging.
492
493 /// Insert an unlinked recipe into a basic block immediately before
494 /// the specified recipe.
495 void insertBefore(VPRecipeBase *InsertPos);
496 /// Insert an unlinked recipe into \p BB immediately before the insertion
497 /// point \p IP;
498 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
499
500 /// Insert an unlinked Recipe into a basic block immediately after
501 /// the specified Recipe.
502 void insertAfter(VPRecipeBase *InsertPos);
503
504 /// Unlink this recipe from its current VPBasicBlock and insert it into
505 /// the VPBasicBlock that MovePos lives in, right after MovePos.
506 void moveAfter(VPRecipeBase *MovePos);
507
508 /// Unlink this recipe and insert into BB before I.
509 ///
510 /// \pre I is a valid iterator into BB.
511 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
512
513 /// This method unlinks 'this' from the containing basic block, but does not
514 /// delete it.
515 void removeFromParent();
516
517 /// This method unlinks 'this' from the containing basic block and deletes it.
518 ///
519 /// \returns an iterator pointing to the element after the erased one
521
522 /// \return an ID for the concrete type of this object.
523 unsigned getVPRecipeID() const { return SubclassID; }
524
525 /// Method to support type inquiry through isa, cast, and dyn_cast.
526 static inline bool classof(const VPDef *D) {
527 // All VPDefs are also VPRecipeBases.
528 return true;
529 }
530
531 static inline bool classof(const VPUser *U) { return true; }
532
533 /// Returns true if the recipe may have side-effects.
534 bool mayHaveSideEffects() const;
535
536 /// Return true if we can safely execute this recipe unconditionally even if
537 /// it is masked originally.
538 bool isSafeToSpeculativelyExecute() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Set the recipe's debug location to \p NewDL.
558 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Dump the recipe to stderr (for debugging).
562 LLVM_ABI_FOR_TEST void dump() const;
563
564 /// Print the recipe, delegating to printRecipe().
565 void print(raw_ostream &O, const Twine &Indent,
567#endif
568
569protected:
570 /// Compute the cost of this recipe either using a recipe's specialized
571 /// implementation or using the legacy cost model and the underlying
572 /// instructions.
573 virtual InstructionCost computeCost(ElementCount VF,
574 VPCostContext &Ctx) const;
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
577 /// Each concrete VPRecipe prints itself, without printing common information,
578 /// like debug info or metadata.
579 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
580 VPSlotTracker &SlotTracker) const = 0;
581#endif
582};
583
584// Helper macro to define common classof implementations for recipes.
585#define VP_CLASSOF_IMPL(VPRecipeID) \
586 static inline bool classof(const VPRecipeBase *R) { \
587 return R->getVPRecipeID() == VPRecipeID; \
588 } \
589 static inline bool classof(const VPValue *V) { \
590 auto *R = V->getDefiningRecipe(); \
591 return R && R->getVPRecipeID() == VPRecipeID; \
592 } \
593 static inline bool classof(const VPUser *U) { \
594 auto *R = dyn_cast<VPRecipeBase>(U); \
595 return R && R->getVPRecipeID() == VPRecipeID; \
596 } \
597 static inline bool classof(const VPSingleDefRecipe *R) { \
598 return R->getVPRecipeID() == VPRecipeID; \
599 }
600
601/// Compute the scalar result type for an IR \p Opcode given \p Operands.
602LLVM_ABI Type *computeScalarTypeForInstruction(unsigned Opcode,
603 ArrayRef<VPValue *> Operands);
604
605/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
606/// or more output IR that define a single result VPValue. Note that
607/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
609 public VPSingleDefValue {
610public:
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
613 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
614
615 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
617 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
618
619 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
620 Type *ResultTy, Value *UV = nullptr,
622 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
623
624 static inline bool classof(const VPRecipeBase *R) {
625 switch (R->getVPRecipeID()) {
626 case VPRecipeBase::VPDerivedIVSC:
627 case VPRecipeBase::VPExpandSCEVSC:
628 case VPRecipeBase::VPExpressionSC:
629 case VPRecipeBase::VPInstructionSC:
630 case VPRecipeBase::VPReductionEVLSC:
631 case VPRecipeBase::VPReductionSC:
632 case VPRecipeBase::VPReplicateSC:
633 case VPRecipeBase::VPScalarIVStepsSC:
634 case VPRecipeBase::VPVectorPointerSC:
635 case VPRecipeBase::VPVectorEndPointerSC:
636 case VPRecipeBase::VPWidenCallSC:
637 case VPRecipeBase::VPWidenCanonicalIVSC:
638 case VPRecipeBase::VPWidenCastSC:
639 case VPRecipeBase::VPWidenGEPSC:
640 case VPRecipeBase::VPWidenIntrinsicSC:
641 case VPRecipeBase::VPWidenMemIntrinsicSC:
642 case VPRecipeBase::VPWidenSC:
643 case VPRecipeBase::VPBlendSC:
644 case VPRecipeBase::VPPredInstPHISC:
645 case VPRecipeBase::VPCurrentIterationPHISC:
646 case VPRecipeBase::VPActiveLaneMaskPHISC:
647 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
648 case VPRecipeBase::VPWidenPHISC:
649 case VPRecipeBase::VPWidenIntOrFpInductionSC:
650 case VPRecipeBase::VPWidenPointerInductionSC:
651 case VPRecipeBase::VPReductionPHISC:
652 case VPRecipeBase::VPWidenLoadEVLSC:
653 case VPRecipeBase::VPWidenLoadSC:
654 return true;
655 case VPRecipeBase::VPBranchOnMaskSC:
656 case VPRecipeBase::VPInterleaveEVLSC:
657 case VPRecipeBase::VPInterleaveSC:
658 case VPRecipeBase::VPIRInstructionSC:
659 case VPRecipeBase::VPWidenStoreEVLSC:
660 case VPRecipeBase::VPWidenStoreSC:
661 case VPRecipeBase::VPHistogramSC:
662 return false;
663 }
664 llvm_unreachable("Unhandled VPRecipeID");
665 }
666
667 static inline bool classof(const VPValue *V) {
668 auto *R = V->getDefiningRecipe();
669 return R && classof(R);
670 }
671
672 static inline bool classof(const VPUser *U) {
673 auto *R = dyn_cast<VPRecipeBase>(U);
674 return R && classof(R);
675 }
676
677 VPSingleDefRecipe *clone() override = 0;
678
679 /// Returns the underlying instruction.
686
687#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
690#endif
691};
692
693/// Class to record and manage LLVM IR flags.
696 enum class OperationType : unsigned char {
697 Cmp,
698 FCmp,
699 OverflowingBinOp,
700 Trunc,
701 DisjointOp,
702 PossiblyExactOp,
703 GEPOp,
704 FPMathOp,
705 NonNegOp,
706 ReductionOp,
707 Other
708 };
709
710public:
711 struct WrapFlagsTy {
712 char HasNUW : 1;
713 char HasNSW : 1;
714
716 };
717
719 char HasNUW : 1;
720 char HasNSW : 1;
721
723 };
724
729
731 char NonNeg : 1;
732 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
733 };
734
735private:
736 struct ExactFlagsTy {
737 char IsExact : 1;
738 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
739 };
740 struct FastMathFlagsTy {
741 char AllowReassoc : 1;
742 char NoNaNs : 1;
743 char NoInfs : 1;
744 char NoSignedZeros : 1;
745 char AllowReciprocal : 1;
746 char AllowContract : 1;
747 char ApproxFunc : 1;
748
749 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
750 };
751 /// Holds both the predicate and fast-math flags for floating-point
752 /// comparisons.
753 struct FCmpFlagsTy {
754 uint8_t CmpPredStorage;
755 FastMathFlagsTy FMFs;
756 };
757 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
758 struct ReductionFlagsTy {
759 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
760 // additional kinds.
761 unsigned char Kind : 6;
762 // TODO: Derive order/in-loop from plan and remove here.
763 unsigned char IsOrdered : 1;
764 unsigned char IsInLoop : 1;
765 FastMathFlagsTy FMFs;
766
767 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
768 FastMathFlags FMFs)
769 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
770 IsInLoop(IsInLoop), FMFs(FMFs) {}
771 };
772
773 OperationType OpType;
774
775 union {
780 ExactFlagsTy ExactFlags;
783 FastMathFlagsTy FMFs;
784 FCmpFlagsTy FCmpFlags;
785 ReductionFlagsTy ReductionFlags;
787 };
788
789public:
790 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
791
793 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
794 OpType = OperationType::FCmp;
796 FCmp->getPredicate());
797 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
798 FCmpFlags.FMFs = FCmp->getFastMathFlags();
799 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
800 OpType = OperationType::Cmp;
802 Op->getPredicate());
803 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
804 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
805 OpType = OperationType::DisjointOp;
806 DisjointFlags.IsDisjoint = Op->isDisjoint();
807 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
808 OpType = OperationType::OverflowingBinOp;
809 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
810 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
811 OpType = OperationType::Trunc;
812 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
813 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
814 OpType = OperationType::PossiblyExactOp;
815 ExactFlags.IsExact = Op->isExact();
816 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
817 OpType = OperationType::GEPOp;
818 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
819 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
820 "wrap flags truncated");
821 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
822 OpType = OperationType::NonNegOp;
823 NonNegFlags.NonNeg = PNNI->hasNonNeg();
824 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
825 OpType = OperationType::FPMathOp;
826 FMFs = Op->getFastMathFlags();
827 }
828 }
829
830 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
832 assert(getPredicate() == Pred && "predicate truncated");
833 }
834
836 : OpType(OperationType::FCmp), AllFlags() {
838 assert(getPredicate() == Pred && "predicate truncated");
839 FCmpFlags.FMFs = FMFs;
840 }
841
843 : OpType(OperationType::OverflowingBinOp), AllFlags() {
844 this->WrapFlags = WrapFlags;
845 }
846
848 : OpType(OperationType::Trunc), AllFlags() {
849 this->TruncFlags = TruncFlags;
850 }
851
852 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
853 this->FMFs = FMFs;
854 }
855
857 : OpType(OperationType::DisjointOp), AllFlags() {
858 this->DisjointFlags = DisjointFlags;
859 }
860
862 : OpType(OperationType::NonNegOp), AllFlags() {
863 this->NonNegFlags = NonNegFlags;
864 }
865
866 VPIRFlags(ExactFlagsTy ExactFlags)
867 : OpType(OperationType::PossiblyExactOp), AllFlags() {
868 this->ExactFlags = ExactFlags;
869 }
870
872 : OpType(OperationType::GEPOp), AllFlags() {
873 GEPFlagsStorage = GEPFlags.getRaw();
874 }
875
876 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
877 : OpType(OperationType::ReductionOp), AllFlags() {
878 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
879 }
880
882 OpType = Other.OpType;
883 AllFlags[0] = Other.AllFlags[0];
884 AllFlags[1] = Other.AllFlags[1];
885 }
886
887 /// Only keep flags also present in \p Other. \p Other must have the same
888 /// OpType as the current object.
889 void intersectFlags(const VPIRFlags &Other);
890
891 /// Drop all poison-generating flags.
893 // NOTE: This needs to be kept in-sync with
894 // Instruction::dropPoisonGeneratingFlags.
895 switch (OpType) {
896 case OperationType::OverflowingBinOp:
897 WrapFlags.HasNUW = false;
898 WrapFlags.HasNSW = false;
899 break;
900 case OperationType::Trunc:
901 TruncFlags.HasNUW = false;
902 TruncFlags.HasNSW = false;
903 break;
904 case OperationType::DisjointOp:
905 DisjointFlags.IsDisjoint = false;
906 break;
907 case OperationType::PossiblyExactOp:
908 ExactFlags.IsExact = false;
909 break;
910 case OperationType::GEPOp:
911 GEPFlagsStorage = 0;
912 break;
913 case OperationType::FPMathOp:
914 case OperationType::FCmp:
915 case OperationType::ReductionOp:
916 getFMFsRef().NoNaNs = false;
917 getFMFsRef().NoInfs = false;
918 break;
919 case OperationType::NonNegOp:
920 NonNegFlags.NonNeg = false;
921 break;
922 case OperationType::Cmp:
923 case OperationType::Other:
924 break;
925 }
926 }
927
928 /// Apply the IR flags to \p I.
929 void applyFlags(Instruction &I) const {
930 switch (OpType) {
931 case OperationType::OverflowingBinOp:
932 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
933 I.setHasNoSignedWrap(WrapFlags.HasNSW);
934 break;
935 case OperationType::Trunc:
936 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
937 I.setHasNoSignedWrap(TruncFlags.HasNSW);
938 break;
939 case OperationType::DisjointOp:
940 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
941 break;
942 case OperationType::PossiblyExactOp:
943 I.setIsExact(ExactFlags.IsExact);
944 break;
945 case OperationType::GEPOp:
946 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
948 break;
949 case OperationType::FPMathOp:
950 case OperationType::FCmp: {
951 const FastMathFlagsTy &F = getFMFsRef();
952 I.setHasAllowReassoc(F.AllowReassoc);
953 I.setHasNoNaNs(F.NoNaNs);
954 I.setHasNoInfs(F.NoInfs);
955 I.setHasNoSignedZeros(F.NoSignedZeros);
956 I.setHasAllowReciprocal(F.AllowReciprocal);
957 I.setHasAllowContract(F.AllowContract);
958 I.setHasApproxFunc(F.ApproxFunc);
959 break;
960 }
961 case OperationType::NonNegOp:
962 I.setNonNeg(NonNegFlags.NonNeg);
963 break;
964 case OperationType::ReductionOp:
965 llvm_unreachable("reduction ops should not use applyFlags");
966 case OperationType::Cmp:
967 case OperationType::Other:
968 break;
969 }
970 }
971
973 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
974 "recipe doesn't have a compare predicate");
975 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
978 }
979
981 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
982 "recipe doesn't have a compare predicate");
983 if (OpType == OperationType::FCmp)
985 else
987 assert(getPredicate() == Pred && "predicate truncated");
988 }
989
993
994 /// Returns true if the recipe has a comparison predicate.
995 bool hasPredicate() const {
996 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
997 }
998
999 /// Returns true if the recipe has fast-math flags.
1000 bool hasFastMathFlags() const {
1001 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1002 OpType == OperationType::ReductionOp;
1003 }
1004
1006
1007 /// Returns true if the recipe has non-negative flag.
1008 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1009
1010 bool isNonNeg() const {
1011 assert(OpType == OperationType::NonNegOp &&
1012 "recipe doesn't have a NNEG flag");
1013 return NonNegFlags.NonNeg;
1014 }
1015
1016 bool hasNoUnsignedWrap() const {
1017 switch (OpType) {
1018 case OperationType::OverflowingBinOp:
1019 return WrapFlags.HasNUW;
1020 case OperationType::Trunc:
1021 return TruncFlags.HasNUW;
1022 default:
1023 llvm_unreachable("recipe doesn't have a NUW flag");
1024 }
1025 }
1026
1027 bool hasNoSignedWrap() const {
1028 switch (OpType) {
1029 case OperationType::OverflowingBinOp:
1030 return WrapFlags.HasNSW;
1031 case OperationType::Trunc:
1032 return TruncFlags.HasNSW;
1033 default:
1034 llvm_unreachable("recipe doesn't have a NSW flag");
1035 }
1036 }
1037
1038 bool hasNoWrapFlags() const {
1039 switch (OpType) {
1040 case OperationType::OverflowingBinOp:
1041 case OperationType::Trunc:
1042 return true;
1043 default:
1044 return false;
1045 }
1046 }
1047
1049 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1050 }
1051
1052 bool isDisjoint() const {
1053 assert(OpType == OperationType::DisjointOp &&
1054 "recipe cannot have a disjoing flag");
1055 return DisjointFlags.IsDisjoint;
1056 }
1057
1059 assert(OpType == OperationType::ReductionOp &&
1060 "recipe doesn't have reduction flags");
1061 return static_cast<RecurKind>(ReductionFlags.Kind);
1062 }
1063
1064 bool isReductionOrdered() const {
1065 assert(OpType == OperationType::ReductionOp &&
1066 "recipe doesn't have reduction flags");
1067 return ReductionFlags.IsOrdered;
1068 }
1069
1070 bool isReductionInLoop() const {
1071 assert(OpType == OperationType::ReductionOp &&
1072 "recipe doesn't have reduction flags");
1073 return ReductionFlags.IsInLoop;
1074 }
1075
1076private:
1077 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1078 FastMathFlagsTy &getFMFsRef() {
1079 if (OpType == OperationType::FCmp)
1080 return FCmpFlags.FMFs;
1081 if (OpType == OperationType::ReductionOp)
1082 return ReductionFlags.FMFs;
1083 return FMFs;
1084 }
1085 const FastMathFlagsTy &getFMFsRef() const {
1086 if (OpType == OperationType::FCmp)
1087 return FCmpFlags.FMFs;
1088 if (OpType == OperationType::ReductionOp)
1089 return ReductionFlags.FMFs;
1090 return FMFs;
1091 }
1092
1093public:
1094 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1095 /// otherwise. Opcodes not supporting default flags include compares and
1096 /// ComputeReductionResult.
1097 static VPIRFlags getDefaultFlags(unsigned Opcode);
1098
1099#if !defined(NDEBUG)
1100 /// Returns true if the set flags are valid for \p Opcode.
1101 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1102
1103 /// Returns true if \p Opcode has its required flags set.
1104 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1105#endif
1106
1107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1108 void printFlags(raw_ostream &O) const;
1109#endif
1110};
1112
1113static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1114
1115/// A pure-virtual common base class for recipes defining a single VPValue and
1116/// using IR flags.
1118 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1119 const VPIRFlags &Flags,
1121 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1122
1123 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1124 Type *ResultTy, const VPIRFlags &Flags,
1126 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1127 VPIRFlags(Flags) {}
1128
1129 static inline bool classof(const VPRecipeBase *R) {
1130 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1143 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1144 }
1145
1146 static inline bool classof(const VPUser *U) {
1147 auto *R = dyn_cast<VPRecipeBase>(U);
1148 return R && classof(R);
1149 }
1150
1151 static inline bool classof(const VPValue *V) {
1152 auto *R = V->getDefiningRecipe();
1153 return R && classof(R);
1154 }
1155
1157
1158 static inline bool classof(const VPSingleDefRecipe *R) {
1159 return classof(static_cast<const VPRecipeBase *>(R));
1160 }
1161
1162 void execute(VPTransformState &State) override = 0;
1163
1164 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1166 VPCostContext &Ctx) const;
1167};
1168
1169/// Helper to manage IR metadata for recipes. It filters out metadata that
1170/// cannot be propagated.
1173
1174public:
1175 VPIRMetadata() = default;
1176
1177 /// Adds metatadata that can be preserved from the original instruction
1178 /// \p I.
1180
1181 /// Copy constructor for cloning.
1183
1185
1186 /// Add all metadata to \p I.
1187 void applyMetadata(Instruction &I) const;
1188
1189 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1190 /// already exists, it will be replaced. Otherwise, it will be added.
1191 void setMetadata(unsigned Kind, MDNode *Node) {
1192 auto It =
1193 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1194 return P.first == Kind;
1195 });
1196 if (It != Metadata.end())
1197 It->second = Node;
1198 else
1199 Metadata.emplace_back(Kind, Node);
1200 }
1201
1202 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1203 /// nodes that are common to both.
1204 void intersect(const VPIRMetadata &MD);
1205
1206 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1207 MDNode *getMetadata(unsigned Kind) const {
1208 auto It =
1209 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1210 return It != Metadata.end() ? It->second : nullptr;
1211 }
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214 /// Print metadata with node IDs.
1215 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1216#endif
1217};
1218
1219/// This is a concrete Recipe that models a single VPlan-level instruction.
1220/// While as any Recipe it may generate a sequence of IR instructions when
1221/// executed, these instructions would always form a single-def expression as
1222/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1223/// opcodes can take an optional mask. Masks may be assigned during
1224/// predication.
1226 public VPIRMetadata {
1227public:
1228 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1229 enum {
1231 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1232 // values of a first-order recurrence.
1234 // Creates a mask where each lane is active (true) whilst the current
1235 // counter (first operand + index) is less than the second operand. i.e.
1236 // mask[i] = icmpt ult (op0 + i), op1
1237 // The size of the mask returned is VF * Multiplier (UF, third op).
1240 // Represents the incoming loop-invariant alias-mask. All memory accesses
1241 // in the loop must stay within the active lanes.
1244 // Increment the canonical IV separately for each unrolled part.
1246 // Abstract instruction that compares two values and branches. This is
1247 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1250 // Branch with 2 boolean condition operands and 3 successors. If condition
1251 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1252 // successor 1; otherwise branches to successor 2. Expanded after region
1253 // dissolution into: (1) an OR of the two conditions branching to
1254 // middle.split or successor 2, and (2) middle.split branching to successor
1255 // 0 or successor 1 based on condition 0.
1258 /// Given operands of (the same) struct type, creates a struct of fixed-
1259 /// width vectors each containing a struct field of all operands. The
1260 /// number of operands matches the element count of every vector.
1262 /// Creates a fixed-width vector containing all operands. The number of
1263 /// operands matches the vector element count.
1265 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1266 /// abstract VPInstruction whose single defined VPValue represents VF
1267 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1268 /// VPInstructions.
1270 /// Reduce the operands to the final reduction result using the operation
1271 /// specified via the operation's VPIRFlags.
1273 // Extracts the last part of its operand. Removed during unrolling.
1275 // Extracts the last lane of its vector operand, per part.
1277 // Extracts the second-to-last lane from its operand or the second-to-last
1278 // part if it is scalar. In the latter case, the recipe will be removed
1279 // during unrolling.
1281 LogicalAnd, // Non-poison propagating logical And.
1282 LogicalOr, // Non-poison propagating logical Or.
1283 NumActiveLanes, // Counts the number of active lanes in a mask.
1284 // Add an offset in bytes (second operand) to a base pointer (first
1285 // operand). Only generates scalar values (either for the first lane only or
1286 // for all lanes, depending on its uses).
1288 // Add a vector offset in bytes (second operand) to a scalar base pointer
1289 // (first operand).
1291 // Returns a scalar boolean value, which is true if any lane of its
1292 // (boolean) vector operands is true. It produces the reduced value across
1293 // all unrolled iterations. Unrolling will add all copies of its original
1294 // operand as additional operands. AnyOf is poison-safe as all operands
1295 // will be frozen.
1297 // Calculates the first active lane index of the vector predicate operands.
1298 // It produces the lane index across all unrolled iterations. Unrolling will
1299 // add all copies of its original operand as additional operands.
1300 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1301 // result even with operands that are all zeroes.
1303 // Calculates the last active lane index of the vector predicate operands.
1304 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1305 // tail-folding to extract the correct live-out value from the last active
1306 // iteration. It produces the lane index across all unrolled iterations.
1307 // Unrolling will add all copies of its original operand as additional
1308 // operands.
1310 // Returns a reversed vector for the operand.
1312 /// Start vector for reductions with 3 operands: the original start value,
1313 /// the identity value for the reduction and an integer indicating the
1314 /// scaling factor.
1316 /// Extracts a single lane (first operand) from a set of vector operands.
1317 /// The lane specifies an index into a vector formed by combining all vector
1318 /// operands (all operands after the first one).
1320 /// Explicit user for the resume phi of the canonical induction in the main
1321 /// VPlan, used by the epilogue vector loop.
1323 /// Extracts the last active lane from a set of vectors. The first operand
1324 /// is the default value if no lanes in the masks are active. Conceptually,
1325 /// this concatenates all data vectors (odd operands), concatenates all
1326 /// masks (even operands -- ignoring the default value), and returns the
1327 /// last active value from the combined data vector using the combined mask.
1329 /// Compute the exiting value of a wide induction after vectorization, that
1330 /// is the value of the last lane of the induction increment (i.e. its
1331 /// backedge value). Has the wide induction recipe as operand.
1334
1335 // The opcodes below are used for VPInstructionWithType.
1336 // NOTE: VPInstructionWithType classes are also used for:
1337 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1338 // cases where createScalarCast, createScalarZExtOrTrunc and
1339 // createScalarSExtOrTrunc are invoked.
1340 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1341
1342 /// Scale the first operand (vector step) by the second operand
1343 /// (scalar-step). Casts both operands to the result type if needed.
1345 // Creates a step vector starting from 0 to VF with a step of 1.
1347 /// Returns the value for vscale.
1349
1351 };
1352
1353 /// Returns true if this VPInstruction generates scalar values for all lanes.
1354 /// Most VPInstructions generate a single value per part, either vector or
1355 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1356 /// values per all lanes, stemming from an original ingredient. This method
1357 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1358 /// underlying ingredient.
1359 bool doesGeneratePerAllLanes() const;
1360
1361 /// Return the number of operands determined by the opcode of the
1362 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1363 /// cannot be determined directly by the opcode.
1364 unsigned getNumOperandsForOpcode() const;
1365
1366private:
1367 typedef unsigned char OpcodeTy;
1368 OpcodeTy Opcode;
1369
1370 /// An optional name that can be used for the generated IR instruction.
1371 std::string Name;
1372
1373 /// Returns true if we can generate a scalar for the first lane only if
1374 /// needed.
1375 bool canGenerateScalarForFirstLane() const;
1376
1377 /// Utility methods serving execute(): generates a single vector instance of
1378 /// the modeled instruction. \returns the generated value. . In some cases an
1379 /// existing value is returned rather than a generated one.
1380 Value *generate(VPTransformState &State);
1381
1382 /// Returns true if the VPInstruction does not need masking.
1383 bool alwaysUnmasked() const {
1384 if (Opcode == VPInstruction::MaskedCond)
1385 return false;
1386
1387 // For now only VPInstructions with underlying values use masks.
1388 // TODO: provide masks to VPInstructions w/o underlying values.
1389 if (!getUnderlyingValue())
1390 return true;
1391
1392 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1393 Opcode == Instruction::GetElementPtr;
1394 }
1395
1396public:
1397 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1398 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1399 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
1400 Type *ResultTy = nullptr);
1401
1402 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1403
1404 VPInstruction *clone() override {
1406 }
1407
1409 Type *ResultTy = nullptr) {
1410 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1411 getDebugLoc(), Name, ResultTy);
1412 if (getUnderlyingValue())
1413 New->setUnderlyingValue(getUnderlyingInstr());
1414 return New;
1415 }
1416
1417 unsigned getOpcode() const { return Opcode; }
1418
1419 /// Add \p Op as operand of this VPInstruction. Only supported for AnyOf,
1420 /// ComputeReductionResult, BuildVector, BuildStructVector, ExtractLane,
1421 /// ExtractLastActive, FirstActiveLane, LastActiveLane.
1422 void addOperand(VPValue *Op);
1423
1424 /// Generate the instruction.
1425 /// TODO: We currently execute only per-part unless a specific instance is
1426 /// provided.
1427 void execute(VPTransformState &State) override;
1428
1429 /// Return the cost of this VPInstruction.
1430 InstructionCost computeCost(ElementCount VF,
1431 VPCostContext &Ctx) const override;
1432
1433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434 /// Print the VPInstruction to dbgs() (for debugging).
1435 LLVM_DUMP_METHOD void dump() const;
1436#endif
1437
1438 bool hasResult() const {
1439 // CallInst may or may not have a result, depending on the called function.
1440 // Conservatively return calls have results for now.
1441 switch (getOpcode()) {
1442 case Instruction::Ret:
1443 case Instruction::UncondBr:
1444 case Instruction::CondBr:
1445 case Instruction::Store:
1446 case Instruction::Switch:
1447 case Instruction::IndirectBr:
1448 case Instruction::Resume:
1449 case Instruction::CatchRet:
1450 case Instruction::Unreachable:
1451 case Instruction::Fence:
1452 case Instruction::AtomicRMW:
1456 return false;
1457 default:
1458 return true;
1459 }
1460 }
1461
1462 /// Returns true if the VPInstruction has a mask operand.
1463 bool isMasked() const {
1464 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1465 // VPInstructions without a fixed number of operands cannot be masked.
1466 if (NumOpsForOpcode == -1u)
1467 return false;
1468 return NumOpsForOpcode + 1 == getNumOperands();
1469 }
1470
1471 /// Returns the number of operands, excluding the mask if the VPInstruction is
1472 /// masked.
1473 unsigned getNumOperandsWithoutMask() const {
1474 return getNumOperands() - isMasked();
1475 }
1476
1477 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1478 void addMask(VPValue *Mask) {
1479 assert(!isMasked() && "recipe is already masked");
1480 if (alwaysUnmasked())
1481 return;
1482 assert(Mask->getScalarType()->isIntegerTy(1) &&
1483 "Mask must be an i1 (vector)");
1484 VPUser::addOperand(Mask);
1485 }
1486
1487 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1488 /// VPInstructions.
1489 VPValue *getMask() const {
1490 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1491 }
1492
1493 /// Returns an iterator range over the operands excluding the mask operand
1494 /// if present.
1501
1502 /// Returns true if the underlying opcode may read from or write to memory.
1503 bool opcodeMayReadOrWriteFromMemory() const;
1504
1505 /// Returns true if the recipe only uses the first lane of operand \p Op.
1506 bool usesFirstLaneOnly(const VPValue *Op) const override;
1507
1508 /// Returns true if the recipe only uses the first part of operand \p Op.
1509 bool usesFirstPartOnly(const VPValue *Op) const override;
1510
1511 /// Returns true if this VPInstruction produces a scalar value from a vector,
1512 /// e.g. by performing a reduction or extracting a lane.
1513 bool isVectorToScalar() const;
1514
1515 /// Returns true if this VPInstruction's operands are single scalars and the
1516 /// result is also a single scalar.
1517 bool isSingleScalar() const;
1518
1519 /// Returns the symbolic name assigned to the VPInstruction.
1520 StringRef getName() const { return Name; }
1521
1522 /// Set the symbolic name for the VPInstruction.
1523 void setName(StringRef NewName) { Name = NewName.str(); }
1524
1525protected:
1526#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1527 /// Print the VPInstruction to \p O.
1528 void printRecipe(raw_ostream &O, const Twine &Indent,
1529 VPSlotTracker &SlotTracker) const override;
1530#endif
1531};
1532
1533/// A specialization of VPInstruction augmenting it with a dedicated result
1534/// type, to be used when the opcode and operands of the VPInstruction don't
1535/// directly determine the result type. Note that there is no separate recipe ID
1536/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1537/// distinguished purely by the opcode.
1538/// TODO: Merge with VPInstruction, now that VPRecipeValue provides the type.
1540public:
1542 Type *ResultTy, const VPIRFlags &Flags = {},
1543 const VPIRMetadata &Metadata = {},
1545 const Twine &Name = "", Value *UV = nullptr)
1546 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {
1548 }
1549
1550 static inline bool classof(const VPRecipeBase *R) {
1551 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1552 // type information.
1553 auto *VPI = dyn_cast<VPInstruction>(R);
1554 if (!VPI)
1555 return false;
1556 unsigned Opc = VPI->getOpcode();
1558 return true;
1559 switch (Opc) {
1563 case Instruction::Load:
1564 return true;
1565 default:
1566 return false;
1567 }
1568 }
1569
1570 static inline bool classof(const VPUser *R) {
1572 }
1573
1574 VPInstruction *clone() override {
1575 auto *New =
1577 *this, *this, getDebugLoc(), getName());
1578 New->setUnderlyingValue(getUnderlyingValue());
1579 return New;
1580 }
1581
1582 void execute(VPTransformState &State) override;
1583
1584 /// Return the cost of this VPInstruction.
1586 VPCostContext &Ctx) const override;
1587
1588 Type *getResultType() const { return getScalarType(); }
1589
1590 /// Cast recipes always use scalars of their operand.
1591 bool usesScalars(const VPValue *Op) const override {
1593 return true;
1595 }
1596
1597protected:
1598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1599 /// Print the recipe.
1600 void printRecipe(raw_ostream &O, const Twine &Indent,
1601 VPSlotTracker &SlotTracker) const override;
1602#endif
1603};
1604
1605/// Helper type to provide functions to access incoming values and blocks for
1606/// phi-like recipes.
1608protected:
1609 /// Return a VPRecipeBase* to the current object.
1610 virtual const VPRecipeBase *getAsRecipe() const = 0;
1611
1612public:
1613 virtual ~VPPhiAccessors() = default;
1614
1615 /// Returns the incoming VPValue with index \p Idx.
1616 VPValue *getIncomingValue(unsigned Idx) const {
1617 return getAsRecipe()->getOperand(Idx);
1618 }
1619
1620 /// Returns the incoming block with index \p Idx.
1621 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1622
1623 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1624 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1625
1626 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1627 /// block.
1628 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1629
1630 /// Returns the number of incoming values, also number of incoming blocks.
1631 virtual unsigned getNumIncoming() const {
1632 return getAsRecipe()->getNumOperands();
1633 }
1634
1635 /// Returns an interator range over the incoming values.
1637 return make_range(getAsRecipe()->op_begin(),
1638 getAsRecipe()->op_begin() + getNumIncoming());
1639 }
1640
1642 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1643
1644 /// Returns an iterator range over the incoming blocks.
1646 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1647 return getIncomingBlock(Idx);
1648 };
1649 return map_range(index_range(0, getNumIncoming()), GetBlock);
1650 }
1651
1652 /// Returns an iterator range over pairs of incoming values and corresponding
1653 /// incoming blocks.
1659
1660 /// Removes the incoming value for \p IncomingBlock, which must be a
1661 /// predecessor.
1662 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1663
1664 /// Append \p IncomingV as an incoming value to the phi-like recipe.
1665 void addIncoming(VPValue *IncomingV) {
1666 auto *R = const_cast<VPRecipeBase *>(getAsRecipe());
1667 assert((R->getNumOperands() == 0 ||
1668 IncomingV->getScalarType() == R->getOperand(0)->getScalarType()) &&
1669 "all incoming values must have the same type");
1670 R->addOperand(IncomingV);
1671 }
1672
1673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 /// Print the recipe.
1676#endif
1677};
1678
1681 const Twine &Name = "", Type *ResultTy = nullptr)
1682 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name,
1683 ResultTy) {}
1684
1685 static inline bool classof(const VPUser *U) {
1686 auto *VPI = dyn_cast<VPInstruction>(U);
1687 return VPI && VPI->getOpcode() == Instruction::PHI;
1688 }
1689
1690 static inline bool classof(const VPValue *V) {
1691 auto *VPI = dyn_cast<VPInstruction>(V);
1692 return VPI && VPI->getOpcode() == Instruction::PHI;
1693 }
1694
1695 static inline bool classof(const VPSingleDefRecipe *SDR) {
1696 auto *VPI = dyn_cast<VPInstruction>(SDR);
1697 return VPI && VPI->getOpcode() == Instruction::PHI;
1698 }
1699
1700 VPPhi *clone() override {
1701 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1702 PhiR->setUnderlyingValue(getUnderlyingValue());
1703 return PhiR;
1704 }
1705
1706 void execute(VPTransformState &State) override;
1707
1708protected:
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 void printRecipe(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714
1715 const VPRecipeBase *getAsRecipe() const override { return this; }
1716};
1717
1718/// A recipe to wrap on original IR instruction not to be modified during
1719/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1720/// Expect PHIs, VPIRInstructions cannot have any operands.
1722 Instruction &I;
1723
1724protected:
1725 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1726 /// subclasses may need to be created, e.g. VPIRPhi.
1728 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1729
1730public:
1731 ~VPIRInstruction() override = default;
1732
1733 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1734 /// VPIRInstruction.
1736
1737 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1738
1740 auto *R = create(I);
1741 for (auto *Op : operands())
1742 R->addOperand(Op);
1743 return R;
1744 }
1745
1746 void execute(VPTransformState &State) override;
1747
1748 /// Return the cost of this VPIRInstruction.
1750 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1751
1752 Instruction &getInstruction() const { return I; }
1753
1754 bool usesScalars(const VPValue *Op) const override {
1756 "Op must be an operand of the recipe");
1757 return true;
1758 }
1759
1760 bool usesFirstPartOnly(const VPValue *Op) const override {
1762 "Op must be an operand of the recipe");
1763 return true;
1764 }
1765
1766 bool usesFirstLaneOnly(const VPValue *Op) const override {
1768 "Op must be an operand of the recipe");
1769 return true;
1770 }
1771
1772protected:
1773#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1774 /// Print the recipe.
1775 void printRecipe(raw_ostream &O, const Twine &Indent,
1776 VPSlotTracker &SlotTracker) const override;
1777#endif
1778};
1779
1780/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1781/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1782/// allowed, and it is used to add a new incoming value for the single
1783/// predecessor VPBB.
1785 public VPPhiAccessors {
1787
1788 static inline bool classof(const VPRecipeBase *U) {
1789 auto *R = dyn_cast<VPIRInstruction>(U);
1790 return R && isa<PHINode>(R->getInstruction());
1791 }
1792
1793 static inline bool classof(const VPUser *U) {
1794 auto *R = dyn_cast<VPRecipeBase>(U);
1795 return R && classof(R);
1796 }
1797
1799
1800 void execute(VPTransformState &State) override;
1801
1802protected:
1803#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1804 /// Print the recipe.
1805 void printRecipe(raw_ostream &O, const Twine &Indent,
1806 VPSlotTracker &SlotTracker) const override;
1807#endif
1808
1809 const VPRecipeBase *getAsRecipe() const override { return this; }
1810};
1811
1812/// VPWidenRecipe is a recipe for producing a widened instruction using the
1813/// opcode and operands of the recipe. This recipe covers most of the
1814/// traditional vectorization cases where each recipe transforms into a
1815/// vectorized version of itself.
1817 public VPIRMetadata {
1818 unsigned Opcode;
1819
1820public:
1822 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1823 DebugLoc DL = {})
1824 : VPWidenRecipe(I.getOpcode(), Operands, Flags, Metadata, DL) {
1825 setUnderlyingValue(&I);
1826 }
1827
1828 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1829 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1830 DebugLoc DL = {})
1831 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands,
1832 computeScalarTypeForInstruction(Opcode, Operands),
1833 Flags, DL),
1834 VPIRMetadata(Metadata), Opcode(Opcode) {}
1835
1836 ~VPWidenRecipe() override = default;
1837
1839
1841 if (auto *UV = getUnderlyingValue())
1842 return new VPWidenRecipe(*cast<Instruction>(UV), NewOperands, *this,
1843 *this, getDebugLoc());
1844 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1845 }
1846
1847 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1848
1849 /// Produce a widened instruction using the opcode and operands of the recipe,
1850 /// processing State.VF elements.
1851 void execute(VPTransformState &State) override;
1852
1853 /// Return the cost of this VPWidenRecipe.
1854 InstructionCost computeCost(ElementCount VF,
1855 VPCostContext &Ctx) const override;
1856
1857 unsigned getOpcode() const { return Opcode; }
1858
1859protected:
1860#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1861 /// Print the recipe.
1862 void printRecipe(raw_ostream &O, const Twine &Indent,
1863 VPSlotTracker &SlotTracker) const override;
1864#endif
1865
1866 /// Returns true if the recipe only uses the first lane of operand \p Op.
1867 bool usesFirstLaneOnly(const VPValue *Op) const override {
1869 "Op must be an operand of the recipe");
1870 return Opcode == Instruction::Select && Op == getOperand(0) &&
1871 Op->isDefinedOutsideLoopRegions();
1872 }
1873};
1874
1875/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1876/// TODO: Merge with VPWidenRecipe now that type is associated to every
1877/// VPRecipeValue.
1879 /// Cast instruction opcode.
1880 Instruction::CastOps Opcode;
1881
1882public:
1884 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1885 const VPIRMetadata &Metadata = {},
1887 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1888 DL),
1889 VPIRMetadata(Metadata), Opcode(Opcode) {
1890 assert(flagsValidForOpcode(Opcode) &&
1891 "Set flags not supported for the provided opcode");
1893 "Opcode requires specific flags to be set");
1895 }
1896
1897 ~VPWidenCastRecipe() override = default;
1898
1900 return new VPWidenCastRecipe(Opcode, getOperand(0), getScalarType(),
1902 *this, *this, getDebugLoc());
1903 }
1904
1905 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1906
1907 /// Produce widened copies of the cast.
1908 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1909
1910 /// Return the cost of this VPWidenCastRecipe.
1912 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1913
1914 Instruction::CastOps getOpcode() const { return Opcode; }
1915
1916protected:
1917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918 /// Print the recipe.
1919 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1920 VPSlotTracker &SlotTracker) const override;
1921#endif
1922};
1923
1924/// A recipe for widening vector intrinsics.
1926 /// ID of the vector intrinsic to widen.
1927 Intrinsic::ID VectorIntrinsicID;
1928
1929 /// True if the intrinsic may read from memory.
1930 bool MayReadFromMemory;
1931
1932 /// True if the intrinsic may read write to memory.
1933 bool MayWriteToMemory;
1934
1935 /// True if the intrinsic may have side-effects.
1936 bool MayHaveSideEffects;
1937
1938protected:
1939 VPWidenIntrinsicRecipe(const unsigned char SC,
1940 Intrinsic::ID VectorIntrinsicID,
1941 ArrayRef<VPValue *> CallArguments, Type *Ty,
1942 const VPIRFlags &Flags = {},
1943 const VPIRMetadata &MD = {},
1945 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1946 VectorIntrinsicID(VectorIntrinsicID) {
1947 LLVMContext &Ctx = Ty->getContext();
1948 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1949 MemoryEffects ME = Attrs.getMemoryEffects();
1950 MayReadFromMemory = !ME.onlyWritesMemory();
1951 MayWriteToMemory = !ME.onlyReadsMemory();
1952 MayHaveSideEffects = MayWriteToMemory ||
1953 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1954 !Attrs.hasAttribute(Attribute::WillReturn);
1955 }
1956
1957 /// Helper function to produce the widened intrinsic call.
1958 CallInst *createVectorCall(VPTransformState &State);
1959
1960public:
1962 ArrayRef<VPValue *> CallArguments, Type *Ty,
1963 const VPIRFlags &Flags = {},
1964 const VPIRMetadata &MD = {},
1966 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1967 Flags, DL),
1968 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1969 MayReadFromMemory(CI.mayReadFromMemory()),
1970 MayWriteToMemory(CI.mayWriteToMemory()),
1971 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1972 setUnderlyingValue(&CI);
1973 }
1974
1976 ArrayRef<VPValue *> CallArguments, Type *Ty,
1977 const VPIRFlags &Flags = {},
1978 const VPIRMetadata &Metadata = {},
1980 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1981 VectorIntrinsicID, CallArguments, Ty, Flags,
1982 Metadata, DL) {}
1983
1984 ~VPWidenIntrinsicRecipe() override = default;
1985
1987 if (Value *CI = getUnderlyingValue())
1988 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1989 operands(), getScalarType(), *this,
1990 *this, getDebugLoc());
1991 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1992 getScalarType(), *this, *this,
1993 getDebugLoc());
1994 }
1995
1996 static inline bool classof(const VPRecipeBase *R) {
1997 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1998 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1999 }
2000
2001 static inline bool classof(const VPUser *U) {
2002 auto *R = dyn_cast<VPRecipeBase>(U);
2003 return R && classof(R);
2004 }
2005
2006 static inline bool classof(const VPValue *V) {
2007 auto *R = V->getDefiningRecipe();
2008 return R && classof(R);
2009 }
2010
2011 static inline bool classof(const VPSingleDefRecipe *R) {
2012 return classof(static_cast<const VPRecipeBase *>(R));
2013 }
2014
2015 /// Produce a widened version of the vector intrinsic.
2016 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
2017
2018 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
2021 const VPRecipeWithIRFlags &R,
2022 ElementCount VF, VPCostContext &Ctx);
2023
2024 /// Return the cost of this vector intrinsic.
2026 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
2027
2028 /// Return the ID of the intrinsic.
2029 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2030
2031 /// Return to name of the intrinsic as string.
2033
2034 /// Returns true if the intrinsic may read from memory.
2035 bool mayReadFromMemory() const { return MayReadFromMemory; }
2036
2037 /// Returns true if the intrinsic may write to memory.
2038 bool mayWriteToMemory() const { return MayWriteToMemory; }
2039
2040 /// Returns true if the intrinsic may have side-effects.
2041 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2042
2043 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2044
2045protected:
2046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2047 /// Print the recipe.
2048 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2049 VPSlotTracker &SlotTracker) const override;
2050#endif
2051};
2052
2053/// A recipe for widening vector memory intrinsics.
2055 /// Alignment information for this memory access.
2056 Align Alignment;
2057
2058public:
2059 // TODO: support StoreInst for strided store
2061 ArrayRef<VPValue *> CallArguments, Type *Ty,
2062 Align Alignment, const VPIRMetadata &MD = {},
2064 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2065 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2066 DL),
2067 Alignment(Alignment) {
2068 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2069 "Unexpected intrinsic");
2070 }
2071
2072 ~VPWidenMemIntrinsicRecipe() override = default;
2073
2076 getScalarType(), Alignment, *this,
2077 getDebugLoc());
2078 }
2079
2080 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2081
2082 /// Produce a widened version of the vector memory intrinsic.
2083 void execute(VPTransformState &State) override;
2084
2085 /// Helper function for computing the cost of vector memory intrinsic.
2087 bool IsMasked, Align Alignment,
2088 VPCostContext &Ctx);
2089
2090 /// Return the cost of this vector memory intrinsic.
2092 VPCostContext &Ctx) const override;
2093};
2094
2095/// A recipe for widening Call instructions using library calls.
2097 public VPIRMetadata {
2098 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2099 /// between a given VF and the chosen vectorized variant, so there will be a
2100 /// different VPlan for each VF with a valid variant.
2101 Function *Variant;
2102
2103public:
2105 ArrayRef<VPValue *> CallArguments,
2106 const VPIRFlags &Flags = {},
2107 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2108 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2109 toScalarizedTy(Variant->getReturnType()), Flags,
2110 DL),
2111 VPIRMetadata(Metadata), Variant(Variant) {
2112 setUnderlyingValue(UV);
2113 assert(
2114 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2115 "last operand must be the called function");
2116 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2117 ->getReturnType() == getScalarType() &&
2118 "Scalar type must match return type of called scalar function");
2119 }
2120
2121 ~VPWidenCallRecipe() override = default;
2122
2124 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2125 *this, *this, getDebugLoc());
2126 }
2127
2128 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2129
2130 /// Produce a widened version of the call instruction.
2131 void execute(VPTransformState &State) override;
2132
2133 /// Return the cost of this VPWidenCallRecipe.
2134 InstructionCost computeCost(ElementCount VF,
2135 VPCostContext &Ctx) const override;
2136
2137 /// Return the cost of widening a call using the vector function \p Variant.
2138 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2139
2143
2146
2147 /// Returns true if the recipe only uses the first lane of operand \p Op.
2148 bool usesFirstLaneOnly(const VPValue *Op) const override;
2149
2150protected:
2151#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2152 /// Print the recipe.
2153 void printRecipe(raw_ostream &O, const Twine &Indent,
2154 VPSlotTracker &SlotTracker) const override;
2155#endif
2156};
2157
2158/// A recipe representing a sequence of load -> update -> store as part of
2159/// a histogram operation. This means there may be aliasing between vector
2160/// lanes, which is handled by the llvm.experimental.vector.histogram family
2161/// of intrinsics. The only update operations currently supported are
2162/// 'add' and 'sub' where the other term is loop-invariant.
2164 /// Opcode of the update operation, currently either add or sub.
2165 unsigned Opcode;
2166
2167public:
2168 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2169 const VPIRMetadata &Metadata = {},
2171 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2172 VPIRMetadata(Metadata), Opcode(Opcode) {}
2173
2174 ~VPHistogramRecipe() override = default;
2175
2177 return new VPHistogramRecipe(Opcode, operands(), *this, getDebugLoc());
2178 }
2179
2180 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2181
2182 /// Produce a vectorized histogram operation.
2183 void execute(VPTransformState &State) override;
2184
2185 /// Return the cost of this VPHistogramRecipe.
2187 VPCostContext &Ctx) const override;
2188
2189 unsigned getOpcode() const { return Opcode; }
2190
2191 /// Return the mask operand if one was provided, or a null pointer if all
2192 /// lanes should be executed unconditionally.
2193 VPValue *getMask() const {
2194 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2195 }
2196
2197protected:
2198#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2199 /// Print the recipe
2200 void printRecipe(raw_ostream &O, const Twine &Indent,
2201 VPSlotTracker &SlotTracker) const override;
2202#endif
2203};
2204
2205/// A recipe for handling GEP instructions.
2207 Type *SourceElementTy;
2208
2209public:
2210 VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
2211 const VPIRFlags &Flags = {},
2213 GetElementPtrInst *UV = nullptr)
2214 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2215 Operands[0]->getScalarType(), Flags, DL),
2216 SourceElementTy(SourceElementTy) {
2217 if (UV) {
2218 setUnderlyingValue(UV);
2221 assert(Metadata.empty() && "unexpected metadata on GEP");
2222 }
2223 }
2224
2225 ~VPWidenGEPRecipe() override = default;
2226
2232
2233 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2234
2235 /// This recipe generates a GEP instruction.
2236 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2237
2238 /// Generate the gep nodes.
2239 void execute(VPTransformState &State) override;
2240
2241 Type *getSourceElementType() const { return SourceElementTy; }
2242
2243 /// Return the cost of this VPWidenGEPRecipe.
2245 VPCostContext &Ctx) const override {
2246 // TODO: Compute accurate cost after retiring the legacy cost model.
2247 return 0;
2248 }
2249
2250 /// Returns true if the recipe only uses the first lane of operand \p Op.
2251 bool usesFirstLaneOnly(const VPValue *Op) const override;
2252
2253protected:
2254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2255 /// Print the recipe.
2256 void printRecipe(raw_ostream &O, const Twine &Indent,
2257 VPSlotTracker &SlotTracker) const override;
2258#endif
2259};
2260
2261/// A recipe to compute a pointer to the last element of each part of a widened
2262/// memory access for widened memory accesses of SourceElementTy. Used for
2263/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2264/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2265/// unroller otherwise.
2267 Type *SourceElementTy;
2268
2269 /// The constant stride of the pointer computed by this recipe, expressed in
2270 /// units of SourceElementTy.
2271 int64_t Stride;
2272
2273public:
2274 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2275 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2276 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2277 Ptr->getScalarType(), GEPFlags, DL),
2278 SourceElementTy(SourceElementTy), Stride(Stride) {
2279 assert(Stride < 0 && "Stride must be negative");
2280 }
2281
2282 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2283
2284 Type *getSourceElementType() const { return SourceElementTy; }
2285 int64_t getStride() const { return Stride; }
2286 VPValue *getPointer() const { return getOperand(0); }
2287 VPValue *getVFValue() const { return getOperand(1); }
2289 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2290 }
2291
2292 /// Adds the offset operand to the recipe.
2293 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2294 void materializeOffset(unsigned Part = 0);
2295
2296 /// Append \p Offset as the offset operand. The offset is an integer index
2297 /// expressed in units of SourceElementTy.
2299 assert(Offset->getScalarType()->isIntegerTy() &&
2300 "offset must be an integer index");
2302 }
2303
2304 void execute(VPTransformState &State) override;
2305
2306 bool usesFirstLaneOnly(const VPValue *Op) const override {
2308 "Op must be an operand of the recipe");
2309 return true;
2310 }
2311
2312 /// Return the cost of this VPVectorPointerRecipe.
2314 VPCostContext &Ctx) const override {
2315 // TODO: Compute accurate cost after retiring the legacy cost model.
2316 return 0;
2317 }
2318
2319 /// Returns true if the recipe only uses the first part of operand \p Op.
2320 bool usesFirstPartOnly(const VPValue *Op) const override {
2322 "Op must be an operand of the recipe");
2323 assert(getNumOperands() <= 2 && "must have at most two operands");
2324 return true;
2325 }
2326
2328 auto *VEPR = new VPVectorEndPointerRecipe(
2331 if (auto *Offset = getOffset())
2332 VEPR->addOffset(Offset);
2333 return VEPR;
2334 }
2335
2336protected:
2337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2338 /// Print the recipe.
2339 void printRecipe(raw_ostream &O, const Twine &Indent,
2340 VPSlotTracker &SlotTracker) const override;
2341#endif
2342};
2343
2344/// A recipe to compute the pointers for widened memory accesses of \p
2345/// SourceElementTy, with the \p Stride expressed in units of \p
2346/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2347/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2349 Type *SourceElementTy;
2350
2351public:
2352 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2353 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2354 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2355 ArrayRef<VPValue *>({Ptr, Stride}),
2356 Ptr->getScalarType(), GEPFlags, DL),
2357 SourceElementTy(SourceElementTy) {}
2358
2359 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2360
2361 VPValue *getStride() const { return getOperand(1); }
2362
2364 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2365 }
2366
2367 /// Add the per-part offset (VFxPart) used for unrolled parts > 0.
2368 void addPerPartOffset(VPValue *VFxPart) {
2369 assert(VFxPart->getScalarType()->isIntegerTy() &&
2370 "per-part offset must be an integer index");
2371 VPUser::addOperand(VFxPart);
2372 }
2373
2374 void execute(VPTransformState &State) override;
2375
2376 Type *getSourceElementType() const { return SourceElementTy; }
2377
2378 bool usesFirstLaneOnly(const VPValue *Op) const override {
2380 "Op must be an operand of the recipe");
2381 return true;
2382 }
2383
2384 /// Returns true if the recipe only uses the first part of operand \p Op.
2385 bool usesFirstPartOnly(const VPValue *Op) const override {
2387 "Op must be an operand of the recipe");
2388 assert(getNumOperands() <= 2 && "must have at most two operands");
2389 return true;
2390 }
2391
2393 auto *Clone =
2394 new VPVectorPointerRecipe(getOperand(0), SourceElementTy, getStride(),
2396 if (auto *VFxPart = getVFxPart())
2397 Clone->addPerPartOffset(VFxPart);
2398 return Clone;
2399 }
2400
2401 /// Return the cost of this VPHeaderPHIRecipe.
2403 VPCostContext &Ctx) const override {
2404 // TODO: Compute accurate cost after retiring the legacy cost model.
2405 return 0;
2406 }
2407
2408protected:
2409#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2410 /// Print the recipe.
2411 void printRecipe(raw_ostream &O, const Twine &Indent,
2412 VPSlotTracker &SlotTracker) const override;
2413#endif
2414};
2415
2416/// A pure virtual base class for all recipes modeling header phis, including
2417/// phis for first order recurrences, pointer inductions and reductions. The
2418/// start value is the first operand of the recipe and the incoming value from
2419/// the backedge is the second operand.
2420///
2421/// Inductions are modeled using the following sub-classes:
2422/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2423/// floating point inductions with arbitrary start and step values. Produces
2424/// a vector PHI per-part.
2425/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2426/// pointer induction. Produces either a vector PHI per-part or scalar values
2427/// per-lane based on the canonical induction.
2428/// * VPFirstOrderRecurrencePHIRecipe
2429/// * VPReductionPHIRecipe
2430/// * VPActiveLaneMaskPHIRecipe
2431/// * VPEVLBasedIVPHIRecipe
2432///
2433/// Note that the canonical IV is modeled as a VPRegionValue associated with
2434/// its loop region.
2436 public VPPhiAccessors {
2437protected:
2438 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2439 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2440 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2441 Start->getScalarType(), DL) {}
2442
2443 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2444 VPValue *Start, Type *ResultTy, DebugLoc DL)
2445 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2446
2447 const VPRecipeBase *getAsRecipe() const override { return this; }
2448
2449public:
2450 ~VPHeaderPHIRecipe() override = default;
2451
2452 /// Method to support type inquiry through isa, cast, and dyn_cast.
2453 static inline bool classof(const VPRecipeBase *R) {
2454 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2455 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2456 }
2457 static inline bool classof(const VPValue *V) {
2458 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2459 }
2460 static inline bool classof(const VPSingleDefRecipe *R) {
2461 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2462 }
2463
2464 /// Generate the phi nodes.
2465 void execute(VPTransformState &State) override = 0;
2466
2467 /// Return the cost of this header phi recipe.
2469 VPCostContext &Ctx) const override;
2470
2471 /// Returns the start value of the phi, if one is set.
2473 return getNumOperands() == 0 ? nullptr : getOperand(0);
2474 }
2476 return getNumOperands() == 0 ? nullptr : getOperand(0);
2477 }
2478
2479 /// Update the start value of the recipe.
2481
2482 /// Returns the incoming value from the loop backedge.
2484 return getOperand(1);
2485 }
2486
2487 /// Update the incoming value from the loop backedge.
2489
2490 /// Add \p V as the incoming value from the loop backedge.
2492 assert(getNumOperands() == 1 &&
2493 "backedge value must be appended right after construction");
2494 assert(V->getScalarType() == getScalarType() &&
2495 "backedge value must have the same type as the start value");
2497 }
2498
2499 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2500 /// to be a recipe.
2502 return *getBackedgeValue()->getDefiningRecipe();
2503 }
2504
2505protected:
2506#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2507 /// Print the recipe.
2508 void printRecipe(raw_ostream &O, const Twine &Indent,
2509 VPSlotTracker &SlotTracker) const override = 0;
2510#endif
2511};
2512
2513/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2514/// VPWidenPointerInductionRecipe), providing shared functionality, including
2515/// retrieving the step value, induction descriptor and original phi node.
2517 InductionDescriptor IndDesc;
2518
2519public:
2520 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2521 VPValue *Step, const InductionDescriptor &IndDesc,
2522 DebugLoc DL)
2523 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2524 Start->getScalarType(), DL) {}
2525
2526 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2527 VPValue *Step, const InductionDescriptor &IndDesc,
2528 Type *ResultTy, DebugLoc DL)
2529 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2530 addOperand(Step);
2531 }
2532
2533 /// After unrolling, append the splat-VF step (`VF * step`) and the value of
2534 /// the induction at the last unrolled part.
2535 void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart) {
2536 assert(LastPart->getScalarType() == getScalarType() &&
2537 "last-part value must match the induction recipe's scalar type");
2539 ? SplatVFStep->getScalarType()->isIntegerTy()
2540 : SplatVFStep->getScalarType() == getScalarType()) &&
2541 "splat-step must match the induction type for non-pointer "
2542 "inductions, or be an integer index for pointer inductions");
2543 VPUser::addOperand(SplatVFStep);
2544 VPUser::addOperand(LastPart);
2545 }
2546
2547 static inline bool classof(const VPRecipeBase *R) {
2548 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2549 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2550 }
2551
2552 static inline bool classof(const VPValue *V) {
2553 auto *R = V->getDefiningRecipe();
2554 return R && classof(R);
2555 }
2556
2557 static inline bool classof(const VPSingleDefRecipe *R) {
2558 return classof(static_cast<const VPRecipeBase *>(R));
2559 }
2560
2561 void execute(VPTransformState &State) override = 0;
2562
2563 /// Returns the start value of the induction.
2565
2566 /// Returns the step value of the induction.
2568 const VPValue *getStepValue() const { return getOperand(1); }
2569
2570 /// Update the step value of the recipe.
2571 void setStepValue(VPValue *V) { setOperand(1, V); }
2572
2574 const VPValue *getVFValue() const { return getOperand(2); }
2575
2576 /// Returns the number of incoming values, also number of incoming blocks.
2577 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2578 /// incoming value, its start value.
2579 unsigned getNumIncoming() const override { return 1; }
2580
2581 /// Returns the underlying PHINode if one exists, or null otherwise.
2585
2586 /// Returns the induction descriptor for the recipe.
2587 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2588
2589 /// Returns the SCEV predicates associated with this induction.
2591 return IndDesc.getNoWrapPredicates();
2592 }
2593
2595 // TODO: All operands of base recipe must exist and be at same index in
2596 // derived recipe.
2598 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2599 }
2600
2602 // TODO: All operands of base recipe must exist and be at same index in
2603 // derived recipe.
2605 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2606 }
2607
2608 /// Returns true if the recipe only uses the first lane of operand \p Op.
2609 bool usesFirstLaneOnly(const VPValue *Op) const override {
2611 "Op must be an operand of the recipe");
2612 // The recipe creates its own wide start value, so it only requests the
2613 // first lane of the operand.
2614 // TODO: Remove once creating the start value is modeled separately.
2615 return Op == getStartValue() || Op == getStepValue();
2616 }
2617};
2618
2619/// A recipe for handling phi nodes of integer and floating-point inductions,
2620/// producing their vector values. This is an abstract recipe and must be
2621/// converted to concrete recipes before executing.
2623 public VPIRFlags {
2624 TruncInst *Trunc;
2625
2626 // If this recipe is unrolled it will have 2 additional operands.
2627 bool isUnrolled() const { return getNumOperands() == 5; }
2628
2629public:
2631 VPValue *VF, const InductionDescriptor &IndDesc,
2632 const VPIRFlags &Flags, DebugLoc DL)
2633 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2634 Start, Step, IndDesc, DL),
2635 VPIRFlags(Flags), Trunc(nullptr) {
2636 addOperand(VF);
2637 }
2638
2640 VPValue *VF, const InductionDescriptor &IndDesc,
2641 TruncInst *Trunc, const VPIRFlags &Flags,
2642 DebugLoc DL)
2643 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2644 Start, Step, IndDesc,
2645 Trunc ? Trunc->getType() : Start->getType(), DL),
2646 VPIRFlags(Flags), Trunc(Trunc) {
2647 addOperand(VF);
2649 if (Trunc)
2651 assert(Metadata.empty() && "unexpected metadata on Trunc");
2652 }
2653
2655
2661
2662 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2663
2664 void execute(VPTransformState &State) override {
2665 llvm_unreachable("cannot execute this recipe, should be expanded via "
2666 "expandVPWidenIntOrFpInductionRecipe");
2667 }
2668
2669 /// Returns the start value of the induction.
2671
2672 /// If the recipe has been unrolled, return the VPValue for the induction
2673 /// increment, otherwise return null.
2675 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2676 }
2677
2678 /// Returns the number of incoming values, also number of incoming blocks.
2679 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2680 /// incoming value, its start value.
2681 unsigned getNumIncoming() const override { return 1; }
2682
2683 /// Returns the first defined value as TruncInst, if it is one or nullptr
2684 /// otherwise.
2685 TruncInst *getTruncInst() { return Trunc; }
2686 const TruncInst *getTruncInst() const { return Trunc; }
2687
2688 /// Returns true if the induction is canonical, i.e. starting at 0 and
2689 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2690 /// same type as the canonical induction.
2691 bool isCanonical() const;
2692
2693 /// Returns the VPValue representing the value of this induction at
2694 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2695 /// take place.
2697 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2698 }
2699
2700protected:
2701#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2702 /// Print the recipe.
2703 void printRecipe(raw_ostream &O, const Twine &Indent,
2704 VPSlotTracker &SlotTracker) const override;
2705#endif
2706};
2707
2709public:
2710 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2711 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2712 /// VF*UF.
2714 VPValue *NumUnrolledElems,
2715 const InductionDescriptor &IndDesc, DebugLoc DL)
2716 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2717 Start, Step, IndDesc, DL) {
2718 addOperand(NumUnrolledElems);
2719 }
2720
2722
2728
2729 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2730
2731 /// Generate vector values for the pointer induction.
2732 void execute(VPTransformState &State) override {
2733 llvm_unreachable("cannot execute this recipe, should be expanded via "
2734 "expandVPWidenPointerInduction");
2735 };
2736
2737 /// Returns true if only scalar values will be generated.
2738 bool onlyScalarsGenerated(bool IsScalable);
2739
2740protected:
2741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2742 /// Print the recipe.
2743 void printRecipe(raw_ostream &O, const Twine &Indent,
2744 VPSlotTracker &SlotTracker) const override;
2745#endif
2746};
2747
2748/// A recipe for widened phis. Incoming values are operands of the recipe and
2749/// their operand index corresponds to the incoming predecessor block. If the
2750/// recipe is placed in an entry block to a (non-replicate) region, it must have
2751/// exactly 2 incoming values, the first from the predecessor of the region and
2752/// the second from the exiting block of the region.
2754 public VPPhiAccessors {
2755 /// Name to use for the generated IR instruction for the widened phi.
2756 std::string Name;
2757
2758public:
2759 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2760 /// debug location \p DL and \p Name.
2762 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2763 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2764 IncomingValues[0]->getScalarType(),
2765 /*UV=*/nullptr, DL),
2766 Name(Name.str()) {
2767 assert(all_of(IncomingValues,
2768 [this](VPValue *VPV) {
2769 return VPV->getScalarType() == getScalarType();
2770 }) &&
2771 "all incoming values must have the same type");
2772 }
2773
2775 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2776 }
2777
2778 ~VPWidenPHIRecipe() override = default;
2779
2780 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2781
2782 /// Generate the phi/select nodes.
2783 void execute(VPTransformState &State) override;
2784
2785 /// Return the cost of this VPWidenPHIRecipe.
2787 VPCostContext &Ctx) const override;
2788
2789protected:
2790#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2791 /// Print the recipe.
2792 void printRecipe(raw_ostream &O, const Twine &Indent,
2793 VPSlotTracker &SlotTracker) const override;
2794#endif
2795
2796 const VPRecipeBase *getAsRecipe() const override { return this; }
2797};
2798
2799/// A recipe for handling first-order recurrence phis. The start value is the
2800/// first operand of the recipe and the incoming value from the backedge is the
2801/// second operand.
2804 VPValue &BackedgeValue)
2805 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2806 &Start) {
2807 addOperand(&BackedgeValue);
2808 }
2809
2810 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2811
2816
2817 void execute(VPTransformState &State) override;
2818
2819 /// Return the cost of this first-order recurrence phi recipe.
2821 VPCostContext &Ctx) const override;
2822
2823 /// Returns true if the recipe only uses the first lane of operand \p Op.
2824 bool usesFirstLaneOnly(const VPValue *Op) const override {
2826 "Op must be an operand of the recipe");
2827 return Op == getStartValue();
2828 }
2829
2830protected:
2831#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2832 /// Print the recipe.
2833 void printRecipe(raw_ostream &O, const Twine &Indent,
2834 VPSlotTracker &SlotTracker) const override;
2835#endif
2836};
2837
2838/// Possible variants of a reduction.
2839
2840/// This reduction is ordered and in-loop.
2841struct RdxOrdered {};
2842/// This reduction is in-loop.
2843struct RdxInLoop {};
2844/// This reduction is unordered with the partial result scaled down by some
2845/// factor.
2848};
2849using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2850
2851inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2852 unsigned ScaleFactor) {
2853 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2854 if (Ordered)
2855 return RdxOrdered{};
2856 if (InLoop)
2857 return RdxInLoop{};
2858 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2859}
2860
2861/// A recipe for handling reduction phis. The start value is the first operand
2862/// of the recipe and the incoming value from the backedge is the second
2863/// operand.
2865 /// The recurrence kind of the reduction.
2866 const RecurKind Kind;
2867
2868 ReductionStyle Style;
2869
2870 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2871 /// patterns for argmin/argmax).
2872 /// TODO: Also support cases where the phi itself has a single use, but its
2873 /// compare has multiple uses.
2874 bool HasUsesOutsideReductionChain;
2875
2876public:
2877 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2879 VPValue &BackedgeValue, ReductionStyle Style,
2880 const VPIRFlags &Flags,
2881 bool HasUsesOutsideReductionChain = false)
2882 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2883 VPIRFlags(Flags), Kind(Kind), Style(Style),
2884 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2885 addOperand(&BackedgeValue);
2886 }
2887
2888 ~VPReductionPHIRecipe() override = default;
2889
2891 VPValue *BackedgeValue) {
2892 return new VPReductionPHIRecipe(
2894 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2895 }
2896
2900
2901 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2902
2903 /// Generate the phi/select nodes.
2904 void execute(VPTransformState &State) override;
2905
2906 /// Get the factor that the VF of this recipe's output should be scaled by, or
2907 /// 1 if it isn't scaled.
2908 unsigned getVFScaleFactor() const {
2909 auto *Partial = std::get_if<RdxUnordered>(&Style);
2910 return Partial ? Partial->VFScaleFactor : 1;
2911 }
2912
2913 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2914 /// > 1.
2915 void setVFScaleFactor(unsigned ScaleFactor) {
2916 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2917 Style = RdxUnordered{ScaleFactor};
2918 }
2919
2920 /// Returns the number of incoming values, also number of incoming blocks.
2921 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2922 /// incoming value, its start value.
2923 unsigned getNumIncoming() const override { return 2; }
2924
2925 /// Returns the recurrence kind of the reduction.
2926 RecurKind getRecurrenceKind() const { return Kind; }
2927
2928 /// Returns true, if the phi is part of an ordered reduction.
2929 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2930
2931 /// Returns true if the phi is part of an in-loop reduction.
2932 bool isInLoop() const {
2933 return std::holds_alternative<RdxInLoop>(Style) ||
2934 std::holds_alternative<RdxOrdered>(Style);
2935 }
2936
2937 /// Returns true if the reduction outputs a vector with a scaled down VF.
2938 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2939
2940 /// Returns true, if the phi is part of a multi-use reduction.
2942 return HasUsesOutsideReductionChain;
2943 }
2944
2945 /// Returns true if the recipe only uses the first lane of operand \p Op.
2946 bool usesFirstLaneOnly(const VPValue *Op) const override {
2948 "Op must be an operand of the recipe");
2949 return isOrdered() || isInLoop();
2950 }
2951
2952protected:
2953#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2954 /// Print the recipe.
2955 void printRecipe(raw_ostream &O, const Twine &Indent,
2956 VPSlotTracker &SlotTracker) const override;
2957#endif
2958};
2959
2960/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2961/// instructions.
2963public:
2964 /// The blend operation is a User of the incoming values and of their
2965 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2966 /// be omitted (implied by passing an odd number of operands) in which case
2967 /// all other incoming values are merged into it.
2969 const VPIRFlags &Flags, DebugLoc DL)
2970 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands,
2971 Operands[0]->getScalarType(), Flags, DL) {
2972 assert(Operands.size() >= 2 && "Expected at least two operands!");
2974 [this](unsigned I) {
2975 return getIncomingValue(I)->getScalarType() ==
2976 getScalarType();
2977 }) &&
2978 "all incoming values must have the same type");
2980 [this](unsigned I) {
2981 return getMask(I)->getScalarType()->isIntegerTy(1);
2982 }) &&
2983 "masks must be a bool");
2984 setUnderlyingValue(Phi);
2985 }
2986
2988
2991 NewOperands, *this, getDebugLoc());
2992 }
2993
2994 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2995
2996 /// A normalized blend is one that has an odd number of operands, whereby the
2997 /// first operand does not have an associated mask.
2998 bool isNormalized() const { return getNumOperands() % 2; }
2999
3000 /// Return the number of incoming values, taking into account when normalized
3001 /// the first incoming value will have no mask.
3002 unsigned getNumIncomingValues() const {
3003 return (getNumOperands() + isNormalized()) / 2;
3004 }
3005
3006 /// Return incoming value number \p Idx.
3007 VPValue *getIncomingValue(unsigned Idx) const {
3008 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
3009 }
3010
3011 /// Return mask number \p Idx.
3012 VPValue *getMask(unsigned Idx) const {
3013 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3014 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
3015 }
3016
3017 /// Set mask number \p Idx to \p V.
3018 void setMask(unsigned Idx, VPValue *V) {
3019 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3020 assert(V->getScalarType()->isIntegerTy(1) && "Mask must be an i1 (vector)");
3021 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
3022 }
3023
3024 void execute(VPTransformState &State) override {
3025 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
3026 }
3027
3028 /// Return the cost of this VPWidenMemoryRecipe.
3029 InstructionCost computeCost(ElementCount VF,
3030 VPCostContext &Ctx) const override;
3031
3032 /// Returns true if the recipe only uses the first lane of operand \p Op.
3033 bool usesFirstLaneOnly(const VPValue *Op) const override;
3034
3035protected:
3036#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3037 /// Print the recipe.
3038 void printRecipe(raw_ostream &O, const Twine &Indent,
3039 VPSlotTracker &SlotTracker) const override;
3040#endif
3041};
3042
3043/// A common base class for interleaved memory operations.
3044/// An Interleaved memory operation is a memory access method that combines
3045/// multiple strided loads/stores into a single wide load/store with shuffles.
3046/// The first operand is the start address. The optional operands are, in order,
3047/// the stored values and the mask.
3049 public VPIRMetadata {
3051
3052 /// Indicates if the interleave group is in a conditional block and requires a
3053 /// mask.
3054 bool HasMask = false;
3055
3056 /// Indicates if gaps between members of the group need to be masked out or if
3057 /// unusued gaps can be loaded speculatively.
3058 bool NeedsMaskForGaps = false;
3059
3060protected:
3061 VPInterleaveBase(const unsigned char SC,
3063 ArrayRef<VPValue *> Operands,
3064 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3065 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3066 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
3067 NeedsMaskForGaps(NeedsMaskForGaps) {
3068 // TODO: extend the masked interleaved-group support to reversed access.
3069 assert((!Mask || !IG->isReverse()) &&
3070 "Reversed masked interleave-group not supported.");
3071 if (StoredValues.empty()) {
3072 for (Instruction *Inst : IG->members()) {
3073 assert(!Inst->getType()->isVoidTy() && "must have result");
3074 new VPMultiDefValue(this, Inst, Inst->getType());
3075 }
3076 } else {
3077 for (auto *SV : StoredValues)
3078 addOperand(SV);
3079 }
3080 if (Mask) {
3081 HasMask = true;
3082 addOperand(Mask);
3083 }
3084 }
3085
3086public:
3087 VPInterleaveBase *clone() override = 0;
3088
3089 static inline bool classof(const VPRecipeBase *R) {
3090 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3091 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3092 }
3093
3094 static inline bool classof(const VPUser *U) {
3095 auto *R = dyn_cast<VPRecipeBase>(U);
3096 return R && classof(R);
3097 }
3098
3099 /// Return the address accessed by this recipe.
3100 VPValue *getAddr() const {
3101 return getOperand(0); // Address is the 1st, mandatory operand.
3102 }
3103
3104 /// Return the mask used by this recipe. Note that a full mask is represented
3105 /// by a nullptr.
3106 VPValue *getMask() const {
3107 // Mask is optional and the last operand.
3108 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
3109 }
3110
3111 /// Return true if the access needs a mask because of the gaps.
3112 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3113
3115
3116 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3117
3118 void execute(VPTransformState &State) override {
3119 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3120 }
3121
3122 /// Return the cost of this recipe.
3123 InstructionCost computeCost(ElementCount VF,
3124 VPCostContext &Ctx) const override;
3125
3126 /// Returns true if the recipe only uses the first lane of operand \p Op.
3127 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3128
3129 /// Returns the number of stored operands of this interleave group. Returns 0
3130 /// for load interleave groups.
3131 virtual unsigned getNumStoreOperands() const = 0;
3132
3133 /// Return the VPValues stored by this interleave group. If it is a load
3134 /// interleave group, return an empty ArrayRef.
3136 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3138 }
3139};
3140
3141/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3142/// or stores into one wide load/store and shuffles. The first operand of a
3143/// VPInterleave recipe is the address, followed by the stored values, followed
3144/// by an optional mask.
3146public:
3148 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3149 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3150 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3151 Mask, NeedsMaskForGaps, MD, DL) {}
3152
3153 ~VPInterleaveRecipe() override = default;
3154
3158 needsMaskForGaps(), *this, getDebugLoc());
3159 }
3160
3161 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3162
3163 /// Generate the wide load or store, and shuffles.
3164 void execute(VPTransformState &State) override;
3165
3166 bool usesFirstLaneOnly(const VPValue *Op) const override {
3168 "Op must be an operand of the recipe");
3169 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
3170 }
3171
3172 unsigned getNumStoreOperands() const override {
3173 return getNumOperands() - (getMask() ? 2 : 1);
3174 }
3175
3176protected:
3177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3178 /// Print the recipe.
3179 void printRecipe(raw_ostream &O, const Twine &Indent,
3180 VPSlotTracker &SlotTracker) const override;
3181#endif
3182};
3183
3184/// A recipe for interleaved memory operations with vector-predication
3185/// intrinsics. The first operand is the address, the second operand is the
3186/// explicit vector length. Stored values and mask are optional operands.
3188public:
3190 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3191 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3192 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3193 R.getDebugLoc()) {
3194 assert(!getInterleaveGroup()->isReverse() &&
3195 "Reversed interleave-group with tail folding is not supported.");
3196 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3197 "supported for scalable vector.");
3198 }
3199
3200 ~VPInterleaveEVLRecipe() override = default;
3201
3203 llvm_unreachable("cloning not implemented yet");
3204 }
3205
3206 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3207
3208 /// The VPValue of the explicit vector length.
3209 VPValue *getEVL() const { return getOperand(1); }
3210
3211 /// Generate the wide load or store, and shuffles.
3212 void execute(VPTransformState &State) override;
3213
3214 /// The recipe only uses the first lane of the address, and EVL operand.
3215 bool usesFirstLaneOnly(const VPValue *Op) const override {
3217 "Op must be an operand of the recipe");
3218 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3219 Op == getEVL();
3220 }
3221
3222 unsigned getNumStoreOperands() const override {
3223 return getNumOperands() - (getMask() ? 3 : 2);
3224 }
3225
3226protected:
3227#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3228 /// Print the recipe.
3229 void printRecipe(raw_ostream &O, const Twine &Indent,
3230 VPSlotTracker &SlotTracker) const override;
3231#endif
3232};
3233
3234/// A recipe to represent inloop, ordered or partial reduction operations. It
3235/// performs a reduction on a vector operand into a scalar (vector in the case
3236/// of a partial reduction) value, and adds the result to a chain. The Operands
3237/// are {ChainOp, VecOp, [Condition]}.
3239
3240 /// The recurrence kind for the reduction in question.
3241 RecurKind RdxKind;
3242 /// Whether the reduction is conditional.
3243 bool IsConditional = false;
3244 ReductionStyle Style;
3245
3246protected:
3247 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3249 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3250 ReductionStyle Style, DebugLoc DL)
3251 : VPRecipeWithIRFlags(SC, Operands, Operands[0]->getScalarType(), FMFs,
3252 DL),
3253 RdxKind(RdxKind), Style(Style) {
3254 assert(all_of(Operands,
3255 [this](VPValue *VPV) {
3256 return VPV->getScalarType() == getScalarType() ||
3257 (isa<VPInstruction>(VPV) &&
3258 cast<VPInstruction>(VPV)->getOpcode() ==
3260 }) &&
3261 "all incoming values must have the same type");
3262 if (CondOp) {
3263 assert(CondOp->getScalarType()->isIntegerTy(1) &&
3264 "CondOp must be a bool");
3265 IsConditional = true;
3266 addOperand(CondOp);
3267 }
3269 }
3270
3271public:
3273 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3275 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3276 {ChainOp, VecOp}, CondOp, Style, DL) {}
3277
3279 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3281 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3282 {ChainOp, VecOp}, CondOp, Style, DL) {}
3283
3284 ~VPReductionRecipe() override = default;
3285
3287 return new VPReductionRecipe(RdxKind, getFastMathFlagsOrNone(),
3289 getCondOp(), Style, getDebugLoc());
3290 }
3291
3292 static inline bool classof(const VPRecipeBase *R) {
3293 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3294 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3295 }
3296
3297 static inline bool classof(const VPUser *U) {
3298 auto *R = dyn_cast<VPRecipeBase>(U);
3299 return R && classof(R);
3300 }
3301
3302 static inline bool classof(const VPValue *VPV) {
3303 const VPRecipeBase *R = VPV->getDefiningRecipe();
3304 return R && classof(R);
3305 }
3306
3307 static inline bool classof(const VPSingleDefRecipe *R) {
3308 return classof(static_cast<const VPRecipeBase *>(R));
3309 }
3310
3311 /// Generate the reduction in the loop.
3312 void execute(VPTransformState &State) override;
3313
3314 /// Return the cost of VPReductionRecipe.
3315 InstructionCost computeCost(ElementCount VF,
3316 VPCostContext &Ctx) const override;
3317
3318 /// Return the recurrence kind for the in-loop reduction.
3319 RecurKind getRecurrenceKind() const { return RdxKind; }
3320 /// Return true if the in-loop reduction is ordered.
3321 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3322 /// Return true if the in-loop reduction is conditional.
3323 bool isConditional() const { return IsConditional; };
3324 /// Returns true if the reduction outputs a vector with a scaled down VF.
3325 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3326 /// Returns true if the reduction is in-loop.
3327 bool isInLoop() const {
3328 return std::holds_alternative<RdxInLoop>(Style) ||
3329 std::holds_alternative<RdxOrdered>(Style);
3330 }
3331 /// The VPValue of the scalar Chain being accumulated.
3332 VPValue *getChainOp() const { return getOperand(0); }
3333 /// The VPValue of the vector value to be reduced.
3334 VPValue *getVecOp() const { return getOperand(1); }
3335 /// The VPValue of the condition for the block.
3337 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3338 }
3339 /// Get the factor that the VF of this recipe's output should be scaled by, or
3340 /// 1 if it isn't scaled.
3341 unsigned getVFScaleFactor() const {
3342 auto *Partial = std::get_if<RdxUnordered>(&Style);
3343 return Partial ? Partial->VFScaleFactor : 1;
3344 }
3345
3346protected:
3347#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3348 /// Print the recipe.
3349 void printRecipe(raw_ostream &O, const Twine &Indent,
3350 VPSlotTracker &SlotTracker) const override;
3351#endif
3352};
3353
3354/// A recipe to represent inloop reduction operations with vector-predication
3355/// intrinsics, performing a reduction on a vector operand with the explicit
3356/// vector length (EVL) into a scalar value, and adding the result to a chain.
3357/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3359public:
3362 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3365 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3366 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3367 DL) {}
3368
3369 ~VPReductionEVLRecipe() override = default;
3370
3372 llvm_unreachable("cloning not implemented yet");
3373 }
3374
3375 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3376
3377 /// Generate the reduction in the loop
3378 void execute(VPTransformState &State) override;
3379
3380 /// The VPValue of the explicit vector length.
3381 VPValue *getEVL() const { return getOperand(2); }
3382
3383 /// Returns true if the recipe only uses the first lane of operand \p Op.
3384 bool usesFirstLaneOnly(const VPValue *Op) const override {
3386 "Op must be an operand of the recipe");
3387 return Op == getEVL();
3388 }
3389
3390protected:
3391#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3392 /// Print the recipe.
3393 void printRecipe(raw_ostream &O, const Twine &Indent,
3394 VPSlotTracker &SlotTracker) const override;
3395#endif
3396};
3397
3398/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3399/// copies of the original scalar type, one per lane, instead of producing a
3400/// single copy of widened type for all lanes. If the instruction is known to be
3401/// a single scalar, only one copy will be generated.
3403 public VPIRMetadata {
3404 /// Indicator if only a single replica per lane is needed.
3405 bool IsSingleScalar;
3406
3407 /// Indicator if the replicas are also predicated.
3408 bool IsPredicated;
3409
3410public:
3412 bool IsSingleScalar, VPValue *Mask = nullptr,
3413 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3414 DebugLoc DL = DebugLoc::getUnknown())
3415 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands,
3416 computeScalarType(I, Operands), Flags, DL),
3417 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3418 IsPredicated(Mask) {
3419 assert((!IsSingleScalar || !I->isCast()) &&
3420 "single-scalar casts should use VPInstructionWithType");
3421 setUnderlyingValue(I);
3422 if (Mask)
3423 addOperand(Mask);
3424 }
3425
3426 ~VPReplicateRecipe() override = default;
3427
3428 /// Compute the scalar result type for a VPReplicateRecipe wrapping \p I with
3429 /// \p Operands (excluding any predicate mask).
3430 static Type *computeScalarType(const Instruction *I,
3431 ArrayRef<VPValue *> Operands);
3432
3434
3436 auto *Copy = new VPReplicateRecipe(
3437 getUnderlyingInstr(), NewOperands, IsSingleScalar,
3438 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3439 Copy->transferFlags(*this);
3440 return Copy;
3441 }
3442
3443 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3444
3445 /// Generate replicas of the desired Ingredient. Replicas will be generated
3446 /// for all parts and lanes unless a specific part and lane are specified in
3447 /// the \p State.
3448 void execute(VPTransformState &State) override;
3449
3450 /// Return the cost of this VPReplicateRecipe.
3451 InstructionCost computeCost(ElementCount VF,
3452 VPCostContext &Ctx) const override;
3453
3454 /// Return the cost of scalarizing a call to \p CalledFn with argument
3455 /// operands \p ArgOps for a given \p VF.
3456 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3458 bool IsSingleScalar, ElementCount VF,
3459 VPCostContext &Ctx);
3460
3461 bool isSingleScalar() const { return IsSingleScalar; }
3462
3463 bool isPredicated() const { return IsPredicated; }
3464
3465 /// Returns true if the recipe only uses the first lane of operand \p Op.
3466 bool usesFirstLaneOnly(const VPValue *Op) const override {
3468 "Op must be an operand of the recipe");
3469 return isSingleScalar();
3470 }
3471
3472 /// Returns true if the recipe uses scalars of operand \p Op.
3473 bool usesScalars(const VPValue *Op) const override {
3475 "Op must be an operand of the recipe");
3476 return true;
3477 }
3478
3479 /// Return the mask of a predicated VPReplicateRecipe.
3481 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3482 return getOperand(getNumOperands() - 1);
3483 }
3484
3485 /// Return the recipe's operands, excluding the mask of a predicated recipe.
3489
3490 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3491
3492protected:
3493#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3494 /// Print the recipe.
3495 void printRecipe(raw_ostream &O, const Twine &Indent,
3496 VPSlotTracker &SlotTracker) const override;
3497#endif
3498};
3499
3500/// A recipe for generating conditional branches on the bits of a mask.
3502public:
3504 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3505
3508 }
3509
3510 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3511
3512 /// Generate the extraction of the appropriate bit from the block mask and the
3513 /// conditional branch.
3514 void execute(VPTransformState &State) override;
3515
3516 /// Return the cost of this VPBranchOnMaskRecipe.
3517 InstructionCost computeCost(ElementCount VF,
3518 VPCostContext &Ctx) const override;
3519
3520#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3521 /// Print the recipe.
3522 void printRecipe(raw_ostream &O, const Twine &Indent,
3523 VPSlotTracker &SlotTracker) const override {
3524 O << Indent << "BRANCH-ON-MASK ";
3526 }
3527#endif
3528
3529 /// Returns true if the recipe uses scalars of operand \p Op.
3530 bool usesScalars(const VPValue *Op) const override {
3532 "Op must be an operand of the recipe");
3533 return true;
3534 }
3535};
3536
3537/// A recipe to combine multiple recipes into a single 'expression' recipe,
3538/// which should be considered a single entity for cost-modeling and transforms.
3539/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3540/// expression recipes, before execute. The individual expression recipes are
3541/// completely disconnected from the def-use graph of other recipes not part of
3542/// the expression. Def-use edges between pairs of expression recipes remain
3543/// intact, whereas every edge between an expression recipe and a recipe outside
3544/// the expression is elevated to connect the non-expression recipe with the
3545/// VPExpressionRecipe itself.
3546class VPExpressionRecipe : public VPSingleDefRecipe {
3547 /// Recipes included in this VPExpressionRecipe. This could contain
3548 /// duplicates.
3549 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3550
3551 /// Temporary VPValues used for external operands of the expression, i.e.
3552 /// operands not defined by recipes in the expression.
3553 SmallVector<VPValue *> LiveInPlaceholders;
3554
3555 enum class ExpressionTypes {
3556 /// Represents an inloop extended reduction operation, performing a
3557 /// reduction on an extended vector operand into a scalar value, and adding
3558 /// the result to a chain.
3559 ExtendedReduction,
3560 /// Represents an inloop extended reduction operation, which is negated,
3561 /// then reduced before adding the result to a chain.
3562 NegatedExtendedReduction,
3563 /// Represent an inloop multiply-accumulate reduction, multiplying the
3564 /// extended vector operands, performing a reduction.add on the result, and
3565 /// adding the scalar result to a chain.
3566 ExtMulAccReduction,
3567 /// Represent an inloop multiply-accumulate reduction, multiplying the
3568 /// vector operands, performing a reduction.add on the result, and adding
3569 /// the scalar result to a chain.
3570 MulAccReduction,
3571 /// Represent an inloop multiply-accumulate reduction, multiplying the
3572 /// extended vector operands, negating the multiplication, performing a
3573 /// reduction.add on the result, and adding the scalar result to a chain.
3574 ExtNegatedMulAccReduction,
3575 };
3576
3577 /// Type of the expression.
3578 ExpressionTypes ExpressionType;
3579
3580 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3581 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3582 /// in the expression) are replaced by temporary VPValues and the original
3583 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3584 /// as needed (excluding last) to ensure they are only used by other recipes
3585 /// in the expression.
3586 VPExpressionRecipe(ExpressionTypes ExpressionType,
3587 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3588
3589public:
3591 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3593 VPReductionRecipe *Red)
3594 : VPExpressionRecipe(ExpressionTypes::NegatedExtendedReduction,
3595 {Ext, Neg, Red}) {
3596 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3597 Red->getRecurrenceKind() == RecurKind::FAdd) &&
3598 "Expected an add reduction");
3599 if (Neg->getOpcode() == Instruction::Sub) {
3600 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(1));
3601 assert(SubConst && SubConst->isZero() && "Expected a negating sub");
3602 } else
3603 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3604 }
3606 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3609 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3610 {Ext0, Ext1, Mul, Red}) {}
3613 VPReductionRecipe *Red)
3614 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3615 {Ext0, Ext1, Mul, Neg, Red}) {
3616 assert((Mul->getOpcode() == Instruction::Mul ||
3617 Mul->getOpcode() == Instruction::FMul) &&
3618 "Expected a mul");
3619 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3620 Red->getRecurrenceKind() == RecurKind::FAdd) &&
3621 "Expected an add reduction");
3622 assert(getNumOperands() >= 3 && "Expected at least three operands");
3623 if (Neg->getOpcode() == Instruction::Sub) {
3624 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3625 assert(SubConst && SubConst->isZero() &&
3626 Neg->getOpcode() == Instruction::Sub && "Expected a negating sub");
3627 } else
3628 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3629 }
3630
3632 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3633 for (auto *R : reverse(ExpressionRecipes)) {
3634 if (ExpressionRecipesSeen.insert(R).second)
3635 delete R;
3636 }
3637 for (VPValue *T : LiveInPlaceholders)
3638 delete T;
3639 }
3640
3641 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3642
3643 VPExpressionRecipe *clone() override {
3644 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3645 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3646 for (auto *R : ExpressionRecipes)
3647 NewExpressiondRecipes.push_back(R->clone());
3648 for (auto *New : NewExpressiondRecipes) {
3649 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3650 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3651 // Update placeholder operands in the cloned recipe to use the external
3652 // operands, to be internalized when the cloned expression is constructed.
3653 for (const auto &[Placeholder, OutsideOp] :
3654 zip(LiveInPlaceholders, operands()))
3655 New->replaceUsesOfWith(Placeholder, OutsideOp);
3656 }
3657 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3658 }
3659
3660 /// Return the VPValue to use to infer the result type of the recipe.
3662 unsigned OpIdx =
3663 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3664 : 1;
3665 return getOperand(getNumOperands() - OpIdx);
3666 }
3667
3668 /// Insert the recipes of the expression back into the VPlan, directly before
3669 /// the current recipe. Leaves the expression recipe empty, which must be
3670 /// removed before codegen.
3671 void decompose();
3672
3673 unsigned getVFScaleFactor() const {
3674 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3675 return PR ? PR->getVFScaleFactor() : 1;
3676 }
3677
3678 /// Method for generating code, must not be called as this recipe is abstract.
3679 void execute(VPTransformState &State) override {
3680 llvm_unreachable("recipe must be removed before execute");
3681 }
3682
3684 VPCostContext &Ctx) const override;
3685
3686 /// Returns true if this expression contains recipes that may read from or
3687 /// write to memory.
3688 bool mayReadOrWriteMemory() const;
3689
3690 /// Returns true if this expression contains recipes that may have side
3691 /// effects.
3692 bool mayHaveSideEffects() const;
3693
3694 /// Returns true if this VPExpressionRecipe produces a single scalar.
3695 bool isVectorToScalar() const;
3696
3697protected:
3698#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3699 /// Print the recipe.
3700 void printRecipe(raw_ostream &O, const Twine &Indent,
3701 VPSlotTracker &SlotTracker) const override;
3702#endif
3703};
3704
3705/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3706/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3707/// order to merge values that are set under such a branch and feed their uses.
3708/// The phi nodes can be scalar or vector depending on the users of the value.
3709/// This recipe works in concert with VPBranchOnMaskRecipe.
3711public:
3712 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3713 /// nodes after merging back from a Branch-on-Mask.
3715 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3716 PredV->getScalarType(), /*UV=*/nullptr, DL) {}
3717 ~VPPredInstPHIRecipe() override = default;
3718
3720 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3721 }
3722
3723 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3724
3725 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3726 /// retain SSA form.
3727 void execute(VPTransformState &State) override;
3728
3729 /// Return the cost of this VPPredInstPHIRecipe.
3731 VPCostContext &Ctx) const override {
3732 // TODO: Compute accurate cost after retiring the legacy cost model.
3733 return 0;
3734 }
3735
3736protected:
3737#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3738 /// Print the recipe.
3739 void printRecipe(raw_ostream &O, const Twine &Indent,
3740 VPSlotTracker &SlotTracker) const override;
3741#endif
3742};
3743
3744/// A common mixin class for widening memory operations. An optional mask can be
3745/// provided as the last operand.
3747protected:
3749
3750 /// Alignment information for this memory access.
3752
3753 /// Whether the accessed addresses are consecutive.
3755
3756 /// Whether the memory access is masked.
3757 bool IsMasked = false;
3758
3759 void setMask(VPValue *Mask) {
3760 assert(!IsMasked && "cannot re-set mask");
3761 if (!Mask)
3762 return;
3763 assert(Mask->getScalarType()->isIntegerTy(1) &&
3764 "Mask must be an i1 (vector)");
3765 getAsRecipe()->addOperand(Mask);
3766 IsMasked = true;
3767 }
3768
3773
3774public:
3775 virtual ~VPWidenMemoryRecipe() = default;
3776
3777 /// Return a VPRecipeBase* to the current object.
3779 virtual const VPRecipeBase *getAsRecipe() const = 0;
3780
3781 /// Return whether the loaded-from / stored-to addresses are consecutive.
3782 bool isConsecutive() const { return Consecutive; }
3783
3784 /// Return the address accessed by this recipe.
3785 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3786
3787 /// Returns true if the recipe is masked.
3788 bool isMasked() const { return IsMasked; }
3789
3790 /// Return the mask used by this recipe. Note that a full mask is represented
3791 /// by a nullptr.
3792 VPValue *getMask() const {
3793 // Mask is optional and therefore the last operand.
3794 const VPRecipeBase *R = getAsRecipe();
3795 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3796 }
3797
3798 /// Returns the alignment of the memory access.
3799 Align getAlign() const { return Alignment; }
3800
3801 /// Return the cost of this VPWidenMemoryRecipe.
3802 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3803
3805};
3806
3807/// A recipe for widening load operations, using the address to load from and an
3808/// optional mask.
3810 public VPWidenMemoryRecipe {
3812 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3813 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3814 &Load, DL),
3815 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3816 setMask(Mask);
3817 }
3818
3821 getMask(), Consecutive, *this, getDebugLoc());
3822 }
3823
3824 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3825
3826 /// Generate a wide load or gather.
3827 void execute(VPTransformState &State) override;
3828
3829 /// Return the cost of this VPWidenLoadRecipe.
3831 VPCostContext &Ctx) const override {
3832 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3833 }
3834
3835 /// Returns true if the recipe only uses the first lane of operand \p Op.
3836 bool usesFirstLaneOnly(const VPValue *Op) const override {
3838 "Op must be an operand of the recipe");
3839 // Widened, consecutive loads operations only demand the first lane of
3840 // their address.
3841 return Op == getAddr() && isConsecutive();
3842 }
3843
3844protected:
3845 VPRecipeBase *getAsRecipe() override;
3846 const VPRecipeBase *getAsRecipe() const override;
3847
3848#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3849 /// Print the recipe.
3850 void printRecipe(raw_ostream &O, const Twine &Indent,
3851 VPSlotTracker &SlotTracker) const override;
3852#endif
3853};
3854
3855/// A recipe for widening load operations with vector-predication intrinsics,
3856/// using the address to load from, the explicit vector length and an optional
3857/// mask.
3859 : public VPSingleDefRecipe,
3860 public VPWidenMemoryRecipe {
3862 VPValue *Mask)
3863 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3864 L.getIngredient().getType(), &L.getIngredient(),
3865 L.getDebugLoc()),
3866 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3867 setMask(Mask);
3868 }
3869
3871 llvm_unreachable("cloning not supported");
3872 }
3873
3874 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3875
3876 /// Return the EVL operand.
3877 VPValue *getEVL() const { return getOperand(1); }
3878
3879 /// Generate the wide load or gather.
3880 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3881
3882 /// Return the cost of this VPWidenLoadEVLRecipe.
3884 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3885
3886 /// Returns true if the recipe only uses the first lane of operand \p Op.
3887 bool usesFirstLaneOnly(const VPValue *Op) const override {
3889 "Op must be an operand of the recipe");
3890 // Widened loads only demand the first lane of EVL and consecutive loads
3891 // only demand the first lane of their address.
3892 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3893 }
3894
3895protected:
3896 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
3897 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
3898
3899#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3900 /// Print the recipe.
3901 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3902 VPSlotTracker &SlotTracker) const override;
3903#endif
3904};
3905
3906/// A recipe for widening store operations, using the stored value, the address
3907/// to store to and an optional mask.
3909 public VPWidenMemoryRecipe {
3910 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3911 VPValue *Mask, bool Consecutive,
3912 const VPIRMetadata &Metadata, DebugLoc DL)
3913 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3914 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3915 setMask(Mask);
3916 }
3917
3921 *this, getDebugLoc());
3922 }
3923
3924 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3925
3926 /// Return the value stored by this recipe.
3927 VPValue *getStoredValue() const { return getOperand(1); }
3928
3929 /// Generate a wide store or scatter.
3930 void execute(VPTransformState &State) override;
3931
3932 /// Return the cost of this VPWidenStoreRecipe.
3934 VPCostContext &Ctx) const override {
3935 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3936 }
3937
3938 /// Returns true if the recipe only uses the first lane of operand \p Op.
3939 bool usesFirstLaneOnly(const VPValue *Op) const override {
3941 "Op must be an operand of the recipe");
3942 // Widened, consecutive stores only demand the first lane of their address,
3943 // unless the same operand is also stored.
3944 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3945 }
3946
3947protected:
3948 VPRecipeBase *getAsRecipe() override;
3949 const VPRecipeBase *getAsRecipe() const override;
3950
3951#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3952 /// Print the recipe.
3953 void printRecipe(raw_ostream &O, const Twine &Indent,
3954 VPSlotTracker &SlotTracker) const override;
3955#endif
3956};
3957
3958/// A recipe for widening store operations with vector-predication intrinsics,
3959/// using the value to store, the address to store to, the explicit vector
3960/// length and an optional mask.
3962 : public VPRecipeBase,
3963 public VPWidenMemoryRecipe {
3965 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3966 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3967 S.getDebugLoc()),
3968 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3969 setMask(Mask);
3970 }
3971
3973 llvm_unreachable("cloning not supported");
3974 }
3975
3976 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3977
3978 /// Return the address accessed by this recipe.
3979 VPValue *getStoredValue() const { return getOperand(1); }
3980
3981 /// Return the EVL operand.
3982 VPValue *getEVL() const { return getOperand(2); }
3983
3984 /// Generate the wide store or scatter.
3985 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3986
3987 /// Return the cost of this VPWidenStoreEVLRecipe.
3989 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3990
3991 /// Returns true if the recipe only uses the first lane of operand \p Op.
3992 bool usesFirstLaneOnly(const VPValue *Op) const override {
3994 "Op must be an operand of the recipe");
3995 if (Op == getEVL()) {
3996 assert(getStoredValue() != Op && "unexpected store of EVL");
3997 return true;
3998 }
3999 // Widened, consecutive memory operations only demand the first lane of
4000 // their address, unless the same operand is also stored. That latter can
4001 // happen with opaque pointers.
4002 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
4003 }
4004
4005protected:
4006 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
4007 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
4008
4009#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4010 /// Print the recipe.
4011 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4012 VPSlotTracker &SlotTracker) const override;
4013#endif
4014};
4015
4016/// Recipe to expand a SCEV expression.
4018 const SCEV *Expr;
4019
4020public:
4021 VPExpandSCEVRecipe(const SCEV *Expr);
4022
4023 ~VPExpandSCEVRecipe() override = default;
4024
4025 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
4026
4027 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
4028
4029 void execute(VPTransformState &State) override {
4030 llvm_unreachable("SCEV expressions must be expanded before final execute");
4031 }
4032
4033 /// Return the cost of this VPExpandSCEVRecipe.
4035 VPCostContext &Ctx) const override {
4036 // TODO: Compute accurate cost after retiring the legacy cost model.
4037 return 0;
4038 }
4039
4040 const SCEV *getSCEV() const { return Expr; }
4041
4042protected:
4043#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4044 /// Print the recipe.
4045 void printRecipe(raw_ostream &O, const Twine &Indent,
4046 VPSlotTracker &SlotTracker) const override;
4047#endif
4048};
4049
4050/// A recipe for generating the active lane mask for the vector loop that is
4051/// used to predicate the vector operations.
4053public:
4055 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
4056 StartMask, DL) {}
4057
4058 ~VPActiveLaneMaskPHIRecipe() override = default;
4059
4062 if (getNumOperands() == 2)
4063 R->addBackedgeValue(getOperand(1));
4064 return R;
4065 }
4066
4067 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
4068
4069 /// Generate the active lane mask phi of the vector loop.
4070 void execute(VPTransformState &State) override;
4071
4072protected:
4073#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4074 /// Print the recipe.
4075 void printRecipe(raw_ostream &O, const Twine &Indent,
4076 VPSlotTracker &SlotTracker) const override;
4077#endif
4078};
4079
4080/// A recipe for generating the phi node tracking the current scalar iteration
4081/// index. It starts at the start value of the canonical induction and gets
4082/// incremented by the number of scalar iterations processed by the vector loop
4083/// iteration. The increment does not have to be loop invariant.
4085public:
4087 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
4088 StartIV, DL) {}
4089
4090 ~VPCurrentIterationPHIRecipe() override = default;
4091
4093 llvm_unreachable("cloning not implemented yet");
4094 }
4095
4096 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
4097
4098 void execute(VPTransformState &State) override {
4099 llvm_unreachable("cannot execute this recipe, should be replaced by a "
4100 "scalar phi recipe");
4101 }
4102
4103 /// Return the cost of this VPCurrentIterationPHIRecipe.
4105 VPCostContext &Ctx) const override {
4106 // For now, match the behavior of the legacy cost model.
4107 return 0;
4108 }
4109
4110 /// Returns true if the recipe only uses the first lane of operand \p Op.
4111 bool usesFirstLaneOnly(const VPValue *Op) const override {
4113 "Op must be an operand of the recipe");
4114 return true;
4115 }
4116
4117protected:
4118#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4119 /// Print the recipe.
4120 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4121 VPSlotTracker &SlotTracker) const override;
4122#endif
4123};
4124
4125/// A Recipe for widening the canonical induction variable of the vector loop.
4126/// First operand is the canonical IV recipe, a second step operand (VF * Part)
4127/// is added during unrolling.
4129public:
4131 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
4132 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
4133 CanonicalIV->getType(), Flags) {}
4134
4135 ~VPWidenCanonicalIVRecipe() override = default;
4136
4138 auto *WideCanIV =
4140 if (VPValue *Step = getStepValue())
4141 WideCanIV->addPerPartStep(Step);
4142 return WideCanIV;
4143 }
4144
4145 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4146
4147 void execute(VPTransformState &State) override {
4148 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4149 }
4150
4151 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4153 VPCostContext &Ctx) const override {
4154 // TODO: Compute accurate cost after retiring the legacy cost model.
4155 return 0;
4156 }
4157
4158 /// Return the canonical IV being widened.
4162
4164 return getNumOperands() == 2 ? getOperand(1) : nullptr;
4165 }
4166
4167 /// Add the per-part step (VF * Part) used for unrolled parts.
4169 assert(Step->getScalarType() == getScalarType() &&
4170 "per-part step must have the same type as the canonical IV");
4171 VPUser::addOperand(Step);
4172 }
4173
4174protected:
4175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4176 /// Print the recipe.
4177 void printRecipe(raw_ostream &O, const Twine &Indent,
4178 VPSlotTracker &SlotTracker) const override;
4179#endif
4180};
4181
4182/// A recipe for converting the input value \p IV value to the corresponding
4183/// value of an IV with different start and step values, using Start + IV *
4184/// Step.
4186 /// Kind of the induction.
4188 /// If not nullptr, the floating point induction binary operator. Must be set
4189 /// for floating point inductions.
4190 const FPMathOperator *FPBinOp;
4191
4192public:
4194 VPValue *CanonicalIV, VPValue *Step)
4196 IndDesc.getKind(),
4197 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
4198 Start, CanonicalIV, Step) {}
4199
4201 const FPMathOperator *FPBinOp, VPIRValue *Start,
4202 VPValue *IV, VPValue *Step)
4203 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4204 Start->getScalarType(), nullptr),
4205 Kind(Kind), FPBinOp(FPBinOp) {}
4206
4207 ~VPDerivedIVRecipe() override = default;
4208
4210 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4211 getStepValue());
4212 }
4213
4214 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4215
4216 void execute(VPTransformState &State) override {
4217 llvm_unreachable("Expected prior expansion of this recipe");
4218 }
4219
4220 /// Return the cost of this VPDerivedIVRecipe.
4222 VPCostContext &Ctx) const override;
4223
4225 VPValue *getIndex() const { return getOperand(1); }
4226 VPValue *getStepValue() const { return getOperand(2); }
4227 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4229
4230 /// Returns true if the recipe only uses the first lane of operand \p Op.
4231 bool usesFirstLaneOnly(const VPValue *Op) const override {
4233 "Op must be an operand of the recipe");
4234 return true;
4235 }
4236
4237protected:
4238#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4239 /// Print the recipe.
4240 void printRecipe(raw_ostream &O, const Twine &Indent,
4241 VPSlotTracker &SlotTracker) const override;
4242#endif
4243};
4244
4245/// A recipe for handling phi nodes of integer and floating-point inductions,
4246/// producing their scalar values. Before unrolling by UF the recipe represents
4247/// the VF*UF scalar values to be produced, or UF scalar values if only first
4248/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4249/// operand StartIndex to all unroll parts except part 0, as the recipe
4250/// represents the VF scalar values (this number of values is taken from
4251/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4253 Instruction::BinaryOps InductionOpcode;
4254
4255public:
4258 DebugLoc DL)
4259 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4260 IV->getScalarType(), FMFs, DL),
4261 InductionOpcode(Opcode) {}
4262
4264 VPValue *Step, VPValue *VF,
4267 IV, Step, VF, IndDesc.getInductionOpcode(),
4268 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4269 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4270 : FastMathFlags(),
4271 DL) {}
4272
4273 ~VPScalarIVStepsRecipe() override = default;
4274
4276 auto *NewR = new VPScalarIVStepsRecipe(
4277 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
4279 if (VPValue *StartIndex = getStartIndex())
4280 NewR->setStartIndex(StartIndex);
4281 return NewR;
4282 }
4283
4284 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4285
4286 /// Generate the scalarized versions of the phi node as needed by their users.
4287 void execute(VPTransformState &State) override;
4288
4289 /// Return the cost of this VPScalarIVStepsRecipe.
4291 VPCostContext &Ctx) const override {
4292 // TODO: Compute accurate cost after retiring the legacy cost model.
4293 return 0;
4294 }
4295
4296 VPValue *getStepValue() const { return getOperand(1); }
4297
4298 /// Return the number of scalars to produce per unroll part, used to compute
4299 /// StartIndex during unrolling.
4300 VPValue *getVFValue() const { return getOperand(2); }
4301
4302 /// Return the StartIndex, or null if known to be zero, valid only after
4303 /// unrolling.
4305 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4306 }
4307
4308 /// Set or add the StartIndex operand.
4309 void setStartIndex(VPValue *StartIndex) {
4310 if (getNumOperands() == 4)
4311 setOperand(3, StartIndex);
4312 else
4313 addOperand(StartIndex);
4314 }
4315
4316 /// Returns true if the recipe only uses the first lane of operand \p Op.
4317 bool usesFirstLaneOnly(const VPValue *Op) const override {
4319 "Op must be an operand of the recipe");
4320 return true;
4321 }
4322
4323 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4324
4325protected:
4326#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4327 /// Print the recipe.
4328 void printRecipe(raw_ostream &O, const Twine &Indent,
4329 VPSlotTracker &SlotTracker) const override;
4330#endif
4331};
4332
4333/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4334/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4335/// VPIRMetadata).
4336namespace vpdetail {
4337template <typename VPMixin, typename... RecipeTys>
4339 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4340 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4341 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4342 "Each type in RecipeTys must derive from VPMixin");
4343
4344 /// Used by isa.
4345 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4346
4347 /// Used by cast.
4348 static VPMixin *doCast(VPRecipeBase *R) {
4349 VPMixin *Out = nullptr;
4350 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4351 assert(Out && "Illegal recipe for cast");
4352 return Out;
4353 }
4354 static VPMixin *castFailed() { return nullptr; }
4355};
4356} // namespace vpdetail
4357
4358/// Support casting from VPRecipeBase -> VPPhiAccessors.
4359template <>
4363
4364template <>
4369template <>
4371 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4372 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4373
4374/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4375template <>
4380template <>
4385
4386/// Support casting from VPRecipeBase -> VPIRMetadata.
4387template <>
4393
4394template <>
4399template <>
4401 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4402 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4403
4404/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4405/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4406/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4407class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4408 friend class VPlan;
4409
4410 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4411 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4412 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4413 if (Recipe)
4414 appendRecipe(Recipe);
4415 }
4416
4417public:
4419
4420protected:
4421 /// The VPRecipes held in the order of output instructions to generate.
4423
4424 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4425 : VPBlockBase(BlockSC, Name.str()) {}
4426
4427public:
4428 ~VPBasicBlock() override {
4429 while (!Recipes.empty())
4430 Recipes.pop_back();
4431 }
4432
4433 /// Instruction iterators...
4438
4439 //===--------------------------------------------------------------------===//
4440 /// Recipe iterator methods
4441 ///
4442 inline iterator begin() { return Recipes.begin(); }
4443 inline const_iterator begin() const { return Recipes.begin(); }
4444 inline iterator end() { return Recipes.end(); }
4445 inline const_iterator end() const { return Recipes.end(); }
4446
4447 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4448 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4449 inline reverse_iterator rend() { return Recipes.rend(); }
4450 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4451
4452 inline size_t size() const { return Recipes.size(); }
4453 inline bool empty() const { return Recipes.empty(); }
4454 inline const VPRecipeBase &front() const { return Recipes.front(); }
4455 inline VPRecipeBase &front() { return Recipes.front(); }
4456 inline const VPRecipeBase &back() const { return Recipes.back(); }
4457 inline VPRecipeBase &back() { return Recipes.back(); }
4458
4459 /// Returns a reference to the list of recipes.
4461
4462 /// Returns a pointer to a member of the recipe list.
4463 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4464 return &VPBasicBlock::Recipes;
4465 }
4466
4467 /// Method to support type inquiry through isa, cast, and dyn_cast.
4468 static inline bool classof(const VPBlockBase *V) {
4469 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4470 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4471 }
4472
4473 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4474 assert(Recipe && "No recipe to append.");
4475 assert(!Recipe->Parent && "Recipe already in VPlan");
4476 Recipe->Parent = this;
4477 Recipes.insert(InsertPt, Recipe);
4478 }
4479
4480 /// Augment the existing recipes of a VPBasicBlock with an additional
4481 /// \p Recipe as the last recipe.
4482 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4483
4484 /// The method which generates the output IR instructions that correspond to
4485 /// this VPBasicBlock, thereby "executing" the VPlan.
4486 void execute(VPTransformState *State) override;
4487
4488 /// Return the cost of this VPBasicBlock.
4489 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4490
4491 /// Return the position of the first non-phi node recipe in the block.
4492 iterator getFirstNonPhi();
4493
4494 /// Returns an iterator range over the PHI-like recipes in the block.
4498
4499 /// Split current block at \p SplitAt by inserting a new block between the
4500 /// current block and its successors and moving all recipes starting at
4501 /// SplitAt to the new block. Returns the new block.
4502 VPBasicBlock *splitAt(iterator SplitAt);
4503
4504 VPRegionBlock *getEnclosingLoopRegion();
4505 const VPRegionBlock *getEnclosingLoopRegion() const;
4506
4507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4508 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4509 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4510 ///
4511 /// Note that the numbering is applied to the whole VPlan, so printing
4512 /// individual blocks is consistent with the whole VPlan printing.
4513 void print(raw_ostream &O, const Twine &Indent,
4514 VPSlotTracker &SlotTracker) const override;
4515 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4516#endif
4517
4518 /// If the block has multiple successors, return the branch recipe terminating
4519 /// the block. If there are no or only a single successor, return nullptr;
4520 VPRecipeBase *getTerminator();
4521 const VPRecipeBase *getTerminator() const;
4522
4523 /// Returns true if the block is exiting it's parent region.
4524 bool isExiting() const;
4525
4526 /// Clone the current block and it's recipes, without updating the operands of
4527 /// the cloned recipes.
4528 VPBasicBlock *clone() override;
4529
4530 /// Returns the predecessor block at index \p Idx with the predecessors as per
4531 /// the corresponding plain CFG. If the block is an entry block to a region,
4532 /// the first predecessor is the single predecessor of a region, and the
4533 /// second predecessor is the exiting block of the region.
4534 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4535
4536protected:
4537 /// Execute the recipes in the IR basic block \p BB.
4538 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4539
4540 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4541 /// generated for this VPBB.
4542 void connectToPredecessors(VPTransformState &State);
4543
4544private:
4545 /// Create an IR BasicBlock to hold the output instructions generated by this
4546 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4547 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4548};
4549
4550inline const VPBasicBlock *
4552 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4553}
4554
4555/// A special type of VPBasicBlock that wraps an existing IR basic block.
4556/// Recipes of the block get added before the first non-phi instruction in the
4557/// wrapped block.
4558/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4559/// preheader block.
4560class VPIRBasicBlock : public VPBasicBlock {
4561 friend class VPlan;
4562
4563 BasicBlock *IRBB;
4564
4565 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4566 VPIRBasicBlock(BasicBlock *IRBB)
4567 : VPBasicBlock(VPIRBasicBlockSC,
4568 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4569 IRBB(IRBB) {}
4570
4571public:
4572 ~VPIRBasicBlock() override = default;
4573
4574 static inline bool classof(const VPBlockBase *V) {
4575 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4576 }
4577
4578 /// The method which generates the output IR instructions that correspond to
4579 /// this VPBasicBlock, thereby "executing" the VPlan.
4580 void execute(VPTransformState *State) override;
4581
4582 VPIRBasicBlock *clone() override;
4583
4584 BasicBlock *getIRBasicBlock() const { return IRBB; }
4585};
4586
4587/// Track information about the canonical IV value of a region.
4588/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4590 /// VPRegionValue for the canonical IV, whose allocation is managed by
4591 /// VPCanonicalIVInfo.
4592 std::unique_ptr<VPRegionValue> CanIV;
4593
4594 /// Whether the increment of the canonical IV may unsigned wrap or not.
4595 bool HasNUW = true;
4596
4597public:
4599 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4600
4601 VPRegionValue *getRegionValue() { return CanIV.get(); }
4602 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4603
4604 bool hasNUW() const { return HasNUW; }
4605
4606 void clearNUW() { HasNUW = false; }
4607};
4608
4609/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4610/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4611/// A VPRegionBlock may indicate that its contents are to be replicated several
4612/// times. This is designed to support predicated scalarization, in which a
4613/// scalar if-then code structure needs to be generated VF * UF times. Having
4614/// this replication indicator helps to keep a single model for multiple
4615/// candidate VF's. The actual replication takes place only once the desired VF
4616/// and UF have been determined.
4617class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4618 friend class VPlan;
4619
4620 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4621 VPBlockBase *Entry;
4622
4623 /// Hold the Single Exiting block of the SESE region modelled by the
4624 /// VPRegionBlock.
4625 VPBlockBase *Exiting;
4626
4627 /// Holds the Canonical IV of the loop region along with additional
4628 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4629 /// Loop regions retain their canonical IVs until they are dissolved, even if
4630 /// the canonical IV has no users.
4631 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4632
4633 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4634 /// VPRegionBlocks.
4635 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4636 const std::string &Name = "")
4637 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4638 if (Entry) {
4639 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4640 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4641 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4642 Entry->setParent(this);
4643 Exiting->setParent(this);
4644 }
4645 }
4646
4647 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4648 VPBlockBase *Exiting, const std::string &Name = "")
4649 : VPRegionBlock(Entry, Exiting, Name) {
4650 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4651 }
4652
4653public:
4654 ~VPRegionBlock() override = default;
4655
4656 /// Method to support type inquiry through isa, cast, and dyn_cast.
4657 static inline bool classof(const VPBlockBase *V) {
4658 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4659 }
4660
4661 const VPBlockBase *getEntry() const { return Entry; }
4662 VPBlockBase *getEntry() { return Entry; }
4663
4664 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4665 /// EntryBlock must have no predecessors.
4666 void setEntry(VPBlockBase *EntryBlock) {
4667 assert(!EntryBlock->hasPredecessors() &&
4668 "Entry block cannot have predecessors.");
4669 Entry = EntryBlock;
4670 EntryBlock->setParent(this);
4671 }
4672
4673 const VPBlockBase *getExiting() const { return Exiting; }
4674 VPBlockBase *getExiting() { return Exiting; }
4675
4676 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4677 /// ExitingBlock must have no successors.
4678 void setExiting(VPBlockBase *ExitingBlock) {
4679 assert(!ExitingBlock->hasSuccessors() &&
4680 "Exit block cannot have successors.");
4681 Exiting = ExitingBlock;
4682 ExitingBlock->setParent(this);
4683 }
4684
4685 /// Returns the pre-header VPBasicBlock of the loop region.
4687 assert(!isReplicator() && "should only get pre-header of loop regions");
4688 return getSinglePredecessor()->getExitingBasicBlock();
4689 }
4690
4691 /// An indicator whether this region is to generate multiple replicated
4692 /// instances of output IR corresponding to its VPBlockBases.
4693 bool isReplicator() const { return !CanIVInfo; }
4694
4695 /// The method which generates the output IR instructions that correspond to
4696 /// this VPRegionBlock, thereby "executing" the VPlan.
4697 void execute(VPTransformState *State) override;
4698
4699 // Return the cost of this region.
4700 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4701
4702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4703 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4704 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4705 /// consequtive numbers.
4706 ///
4707 /// Note that the numbering is applied to the whole VPlan, so printing
4708 /// individual regions is consistent with the whole VPlan printing.
4709 void print(raw_ostream &O, const Twine &Indent,
4710 VPSlotTracker &SlotTracker) const override;
4711 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4712#endif
4713
4714 /// Clone all blocks in the single-entry single-exit region of the block and
4715 /// their recipes without updating the operands of the cloned recipes.
4716 VPRegionBlock *clone() override;
4717
4718 /// Remove the current region from its VPlan, connecting its predecessor to
4719 /// its entry, and its exiting block to its successor.
4720 void dissolveToCFGLoop();
4721
4722 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4723 /// a new increment before the terminator and return it. The canonical IV
4724 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4725 VPInstruction *getOrCreateCanonicalIVIncrement();
4726
4727 /// Return the canonical induction variable of the region, null for
4728 /// replicating regions.
4730 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4731 }
4733 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4734 }
4735
4736 /// Return the type of the canonical IV for loop regions.
4738 return CanIVInfo->getRegionValue()->getType();
4739 }
4740
4741 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4742 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4743
4744 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4746 assert(Increment && "Must provide increment to clear");
4747 Increment->dropPoisonGeneratingFlags();
4748 CanIVInfo->clearNUW();
4749 }
4750};
4751
4753 return getParent()->getParent();
4754}
4755
4757 return getParent()->getParent();
4758}
4759
4760/// VPlan models a candidate for vectorization, encoding various decisions take
4761/// to produce efficient output IR, including which branches, basic-blocks and
4762/// output IR instructions to generate, and their cost. VPlan holds a
4763/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4764/// VPBasicBlock.
4765class VPlan {
4766 friend class VPlanPrinter;
4767 friend class VPSlotTracker;
4768
4769 /// VPBasicBlock corresponding to the original preheader. Used to place
4770 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4771 /// rest of VPlan execution.
4772 /// When this VPlan is used for the epilogue vector loop, the entry will be
4773 /// replaced by a new entry block created during skeleton creation.
4774 VPBasicBlock *Entry;
4775
4776 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4777 VPIRBasicBlock *ScalarHeader;
4778
4779 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4780 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4781 /// e.g. if the scalar epilogue always executes.
4783
4784 /// Holds the VFs applicable to this VPlan.
4786
4787 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4788 /// any UF.
4790
4791 /// Holds the name of the VPlan, for printing.
4792 std::string Name;
4793
4794 /// Represents the trip count of the original loop, for folding
4795 /// the tail.
4796 VPValue *TripCount = nullptr;
4797
4798 /// Represents the backedge taken count of the original loop, for folding
4799 /// the tail. It equals TripCount - 1.
4800 VPSymbolicValue *BackedgeTakenCount = nullptr;
4801
4802 /// Represents the vector trip count.
4803 VPSymbolicValue VectorTripCount;
4804
4805 /// Represents the vectorization factor of the loop.
4806 VPSymbolicValue VF;
4807
4808 /// Represents the unroll factor of the loop.
4809 VPSymbolicValue UF;
4810
4811 /// Represents the loop-invariant VF * UF of the vector loop region.
4812 VPSymbolicValue VFxUF;
4813
4814 /// Contains all the external definitions created for this VPlan, as a mapping
4815 /// from IR Values to VPIRValues.
4817
4818 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4819 /// VPlan is destroyed.
4820 SmallVector<VPBlockBase *> CreatedBlocks;
4821
4822 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4823 /// wrapping the original header of the scalar loop. The vector loop will have
4824 /// index type \p IdxTy.
4825 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4826 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4827 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4828 Entry->setPlan(this);
4829 assert(ScalarHeader->getNumSuccessors() == 0 &&
4830 "scalar header must be a leaf node");
4831 }
4832
4833public:
4834 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4835 /// original preheader and scalar header of \p L, to be used as entry and
4836 /// scalar header blocks of the new VPlan. The vector loop will have index
4837 /// type \p IdxTy.
4838 VPlan(Loop *L, Type *IdxTy);
4839
4840 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4841 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4842 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4843 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4844 setEntry(createVPBasicBlock("preheader"));
4845 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4846 }
4847
4849
4851 Entry = VPBB;
4852 VPBB->setPlan(this);
4853 }
4854
4855 /// Generate the IR code for this VPlan.
4856 void execute(VPTransformState *State);
4857
4858 /// Return the cost of this plan.
4860
4861 VPBasicBlock *getEntry() { return Entry; }
4862 const VPBasicBlock *getEntry() const { return Entry; }
4863
4864 /// Returns the preheader of the vector loop region, if one exists, or null
4865 /// otherwise.
4867 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4868 return VectorRegion
4869 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4870 : nullptr;
4871 }
4872
4873 /// Returns the VPRegionBlock of the vector loop.
4876
4877 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4878 /// loop region contains a nested loop region.
4879 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4880
4881 /// Returns the 'middle' block of the plan, that is the block that selects
4882 /// whether to execute the scalar tail loop or the exit block from the loop
4883 /// latch. If there is an early exit from the vector loop, the middle block
4884 /// conceptully has the early exit block as third successor, split accross 2
4885 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4886 /// tail loop or the exit block. If the scalar tail loop or exit block are
4887 /// known to always execute, the middle block may branch directly to that
4888 /// block. This function cannot be called once the vector loop region has been
4889 /// removed.
4891 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4892 assert(
4893 LoopRegion &&
4894 "cannot call the function after vector loop region has been removed");
4895 // The middle block is always the last successor of the region.
4896 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4897 }
4898
4900 return const_cast<VPlan *>(this)->getMiddleBlock();
4901 }
4902
4903 /// Return the VPBasicBlock for the preheader of the scalar loop.
4906 getScalarHeader()->getSinglePredecessor());
4907 }
4908
4909 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4910 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4911
4912 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4913 /// the original scalar loop.
4914 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4915
4916 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4917 /// exit block.
4919
4920 /// Returns true if \p VPBB is an exit block.
4921 bool isExitBlock(VPBlockBase *VPBB);
4922
4923 /// The trip count of the original loop.
4925 assert(TripCount && "trip count needs to be set before accessing it");
4926 return TripCount;
4927 }
4928
4929 /// Set the trip count assuming it is currently null; if it is not - use
4930 /// resetTripCount().
4931 void setTripCount(VPValue *NewTripCount) {
4932 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4933 TripCount = NewTripCount;
4934 }
4935
4936 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4937 /// the original trip count have been replaced.
4938 void resetTripCount(VPValue *NewTripCount) {
4939 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4940 "TripCount must be set when resetting");
4941 TripCount = NewTripCount;
4942 }
4943
4944 /// The backedge taken count of the original loop.
4946 // BTC shares the canonical IV type with VectorTripCount.
4947 if (!BackedgeTakenCount)
4948 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4949 return BackedgeTakenCount;
4950 }
4951 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4952
4953 /// The vector trip count.
4954 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4955
4956 /// Returns the VF of the vector loop region.
4957 VPSymbolicValue &getVF() { return VF; };
4958 const VPSymbolicValue &getVF() const { return VF; };
4959
4960 /// Returns the UF of the vector loop region.
4961 VPSymbolicValue &getUF() { return UF; };
4962
4963 /// Returns VF * UF of the vector loop region.
4964 VPSymbolicValue &getVFxUF() { return VFxUF; }
4965
4968 }
4969
4970 const DataLayout &getDataLayout() const {
4972 }
4973
4974 void addVF(ElementCount VF) { VFs.insert(VF); }
4975
4977 assert(hasVF(VF) && "Cannot set VF not already in plan");
4978 VFs.clear();
4979 VFs.insert(VF);
4980 }
4981
4982 /// Remove \p VF from the plan.
4984 assert(hasVF(VF) && "tried to remove VF not present in plan");
4985 VFs.remove(VF);
4986 }
4987
4988 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4989 bool hasScalableVF() const {
4990 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4991 }
4992
4993 /// Returns an iterator range over all VFs of the plan.
4996 return VFs;
4997 }
4998
4999 /// Returns the single VF of the plan, asserting that the plan has exactly
5000 /// one VF.
5002 assert(VFs.size() == 1 && "expected plan with single VF");
5003 return VFs[0];
5004 }
5005
5006 bool hasScalarVFOnly() const {
5007 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
5008 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
5009 "Plan with scalar VF should only have a single VF");
5010 return HasScalarVFOnly;
5011 }
5012
5013 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
5014
5015 /// Returns the concrete UF of the plan, after unrolling.
5016 unsigned getConcreteUF() const {
5017 assert(UFs.size() == 1 && "Expected a single UF");
5018 return UFs[0];
5019 }
5020
5021 void setUF(unsigned UF) {
5022 assert(hasUF(UF) && "Cannot set the UF not already in plan");
5023 UFs.clear();
5024 UFs.insert(UF);
5025 }
5026
5027 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
5028 /// concrete UF.
5029 bool isUnrolled() const { return UFs.size() == 1; }
5030
5031 /// Return a string with the name of the plan and the applicable VFs and UFs.
5032 std::string getName() const;
5033
5034 void setName(const Twine &newName) { Name = newName.str(); }
5035
5036 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
5037 /// yet) for \p V.
5039 assert(V && "Trying to get or add the VPIRValue of a null Value");
5040 auto [It, Inserted] = LiveIns.try_emplace(V);
5041 if (Inserted) {
5042 if (auto *CI = dyn_cast<ConstantInt>(V))
5043 It->second = new VPConstantInt(CI);
5044 else
5045 It->second = new VPIRValue(V);
5046 }
5047
5048 assert(isa<VPIRValue>(It->second) &&
5049 "Only VPIRValues should be in mapping");
5050 return It->second;
5051 }
5053 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
5054 return getOrAddLiveIn(V->getValue());
5055 }
5056
5057 /// Return a VPIRValue wrapping i1 true.
5058 VPIRValue *getTrue() { return getConstantInt(1, 1); }
5059
5060 /// Return a VPIRValue wrapping i1 false.
5061 VPIRValue *getFalse() { return getConstantInt(1, 0); }
5062
5063 /// Return a VPIRValue wrapping the null value of type \p Ty.
5064 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
5065
5066 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
5068 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
5069 }
5070
5071 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
5072 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
5073 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
5074 }
5075
5076 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
5077 /// value.
5079 bool IsSigned = false) {
5080 return getConstantInt(APInt(BitWidth, Val, IsSigned));
5081 }
5082
5083 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
5085 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
5086 }
5087
5088 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
5089 /// otherwise.
5090 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
5091
5092 /// Return the list of live-in VPValues available in the VPlan.
5093 auto getLiveIns() const { return LiveIns.values(); }
5094
5095#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5096 /// Print the live-ins of this VPlan to \p O.
5097 void printLiveIns(raw_ostream &O) const;
5098
5099 /// Print this VPlan to \p O.
5100 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
5101
5102 /// Print this VPlan in DOT format to \p O.
5103 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
5104
5105 /// Dump the plan to stderr (for debugging).
5106 LLVM_DUMP_METHOD void dump() const;
5107#endif
5108
5109 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
5110 /// recipes to refer to the clones, and return it.
5112
5113 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
5114 /// present. The returned block is owned by the VPlan and deleted once the
5115 /// VPlan is destroyed.
5117 VPRecipeBase *Recipe = nullptr) {
5118 auto *VPB = new VPBasicBlock(Name, Recipe);
5119 CreatedBlocks.push_back(VPB);
5120 return VPB;
5121 }
5122
5123 /// Create a new loop region with a canonical IV using \p CanIVTy and
5124 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
5125 /// to \p Entry and \p Exiting respectively, if provided. The returned block
5126 /// is owned by the VPlan and deleted once the VPlan is destroyed.
5128 const std::string &Name = "",
5129 VPBlockBase *Entry = nullptr,
5130 VPBlockBase *Exiting = nullptr) {
5131 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
5132 CreatedBlocks.push_back(VPB);
5133 return VPB;
5134 }
5135
5136 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
5137 /// returned block is owned by the VPlan and deleted once the VPlan is
5138 /// destroyed.
5140 const std::string &Name = "") {
5141 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
5142 CreatedBlocks.push_back(VPB);
5143 return VPB;
5144 }
5145
5146 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5147 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5148 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5150
5151 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5152 /// instructions in \p IRBB, except its terminator which is managed by the
5153 /// successors of the block in VPlan. The returned block is owned by the VPlan
5154 /// and deleted once the VPlan is destroyed.
5156
5157 /// Returns true if the VPlan is based on a loop with an early exit. That is
5158 /// the case if the VPlan has either more than one exit block or a single exit
5159 /// block with multiple predecessors (one for the exit via the latch and one
5160 /// via the other early exit).
5161 bool hasEarlyExit() const {
5162 return count_if(ExitBlocks,
5163 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5164 1 ||
5165 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5166 }
5167
5168 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5169 /// if the middle block is a predecessor of the scalar preheader. Note that
5170 /// this relies on unneeded branches to the scalar tail loop being removed.
5171 bool hasScalarTail() const {
5172 auto *ScalarPH = getScalarPreheader();
5173 return ScalarPH &&
5174 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
5175 }
5176
5177 /// The type of the canonical induction variable of the vector loop.
5178 Type *getIndexType() const { return VF.getType(); }
5179};
5180
5181#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5182inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5183 Plan.print(OS);
5184 return OS;
5185}
5186#endif
5187
5188} // end namespace llvm
5189
5190#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
static Interval intersect(const Interval &I1, const Interval &I2)
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:585
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:151
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1069
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4060
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:4054
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4407
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4435
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4482
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4437
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4434
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4460
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4418
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4424
iterator end()
Definition VPlan.h:4444
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4442
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4436
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4495
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:763
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
~VPBasicBlock() override
Definition VPlan.h:4428
const_reverse_iterator rbegin() const
Definition VPlan.h:4448
reverse_iterator rend()
Definition VPlan.h:4449
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4422
VPRecipeBase & back()
Definition VPlan.h:4457
const VPRecipeBase & front() const
Definition VPlan.h:4454
const_iterator begin() const
Definition VPlan.h:4443
VPRecipeBase & front()
Definition VPlan.h:4455
const VPRecipeBase & back() const
Definition VPlan.h:4456
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4473
bool empty() const
Definition VPlan.h:4453
const_iterator end() const
Definition VPlan.h:4445
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4468
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4463
reverse_iterator rbegin()
Definition VPlan.h:4447
friend class VPlan
Definition VPlan.h:4408
size_t size() const
Definition VPlan.h:4452
const_reverse_iterator rend() const
Definition VPlan.h:4450
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:3007
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:3012
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2968
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:3002
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3024
VPBlendRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:2989
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2987
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:3018
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2998
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:315
VPRegionBlock * getParent()
Definition VPlan.h:186
VPBlocksTy & getPredecessors()
Definition VPlan.h:223
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:220
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:385
void setName(const Twine &newName)
Definition VPlan.h:179
size_t getNumSuccessors() const
Definition VPlan.h:237
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:219
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:217
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:337
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:173
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:273
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:350
size_t getNumPredecessors() const
Definition VPlan.h:238
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:306
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:343
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:215
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:171
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const VPRegionBlock * getParent() const
Definition VPlan.h:187
const std::string & getName() const
Definition VPlan.h:177
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:325
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:263
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:297
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:233
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:257
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:322
friend class VPBlockUtils
Definition VPlan.h:95
unsigned getVPBlockID() const
Definition VPlan.h:184
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:364
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:329
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:163
VPBlocksTy & getSuccessors()
Definition VPlan.h:212
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:286
void setParent(VPRegionBlock *P)
Definition VPlan.h:197
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:279
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3522
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3506
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3530
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3503
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4601
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4598
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4602
bool hasNUW() const
Definition VPlan.h:4604
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4092
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:4086
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:4104
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:4098
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4111
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4228
VPValue * getIndex() const
Definition VPlan.h:4225
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:4227
VPIRValue * getStartValue() const
Definition VPlan.h:4224
VPValue * getStepValue() const
Definition VPlan.h:4226
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4216
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4209
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:4200
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4231
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:4193
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4029
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:4034
VPExpandSCEVRecipe(const SCEV *Expr)
const SCEV * getSCEV() const
Definition VPlan.h:4040
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4025
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3679
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3661
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3592
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3643
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3631
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3590
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3607
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3611
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3673
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3605
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2436
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2438
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2447
void addBackedgeValue(VPValue *V)
Add V as the incoming value from the loop backedge.
Definition VPlan.h:2491
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2460
static bool classof(const VPValue *V)
Definition VPlan.h:2457
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2483
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2443
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2488
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2472
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2480
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2453
VPValue * getStartValue() const
Definition VPlan.h:2475
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2501
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition VPlan.h:2163
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2176
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2193
unsigned getOpcode() const
Definition VPlan.h:2189
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2168
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4560
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:473
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4584
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4574
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4561
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:498
Class to record and manage LLVM IR flags.
Definition VPlan.h:695
FastMathFlagsTy FMFs
Definition VPlan.h:783
ReductionFlagsTy ReductionFlags
Definition VPlan.h:785
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1038
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:876
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:856
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:842
WrapFlagsTy WrapFlags
Definition VPlan.h:777
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:835
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:1000
bool isReductionOrdered() const
Definition VPlan.h:1064
TruncFlagsTy TruncFlags
Definition VPlan.h:778
CmpInst::Predicate getPredicate() const
Definition VPlan.h:972
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1048
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
uint8_t AllFlags[2]
Definition VPlan.h:786
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1008
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:881
ExactFlagsTy ExactFlags
Definition VPlan.h:780
bool hasNoSignedWrap() const
Definition VPlan.h:1027
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1052
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:847
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:852
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:861
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:830
uint8_t GEPFlagsStorage
Definition VPlan.h:781
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:866
bool isNonNeg() const
Definition VPlan.h:1010
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:990
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:995
DisjointFlagsTy DisjointFlags
Definition VPlan.h:779
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:980
bool hasNoUnsignedWrap() const
Definition VPlan.h:1016
FCmpFlagsTy FCmpFlags
Definition VPlan.h:784
NonNegFlagsTy NonNegFlags
Definition VPlan.h:782
bool isReductionInLoop() const
Definition VPlan.h:1070
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:892
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:929
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:871
uint8_t CmpPredStorage
Definition VPlan.h:776
RecurKind getRecurKind() const
Definition VPlan.h:1058
VPIRFlags(Instruction &I)
Definition VPlan.h:792
Instruction & getInstruction() const
Definition VPlan.h:1752
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1760
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1739
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1766
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1754
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1727
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1171
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1207
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1179
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
VPIRMetadata()=default
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1191
static bool classof(const VPUser *R)
Definition VPlan.h:1570
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1550
Type * getResultType() const
Definition VPlan.h:1588
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1574
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Value *UV=nullptr)
Definition VPlan.h:1541
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
bool usesScalars(const VPValue *Op) const override
Cast recipes always use scalars of their operand.
Definition VPlan.h:1591
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1473
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1495
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1404
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1328
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1319
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1332
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1344
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1322
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1269
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1315
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1264
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1261
@ VScale
Returns the value for vscale.
Definition VPlan.h:1348
@ CanonicalIVIncrementForPart
Definition VPlan.h:1245
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1272
bool hasResult() const
Definition VPlan.h:1438
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1498
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1478
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1520
unsigned getOpcode() const
Definition VPlan.h:1417
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1523
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1489
VPInstruction * cloneWithOperands(ArrayRef< VPValue * > NewOperands, Type *ResultTy=nullptr)
Definition VPlan.h:1408
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1463
A common base class for interleaved memory operations.
Definition VPlan.h:3049
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3112
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3118
static bool classof(const VPUser *U)
Definition VPlan.h:3094
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3061
Instruction * getInsertPos() const
Definition VPlan.h:3116
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3089
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3114
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3106
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3135
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3100
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3215
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3209
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3222
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3202
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3189
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:3145
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3172
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3155
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3166
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3147
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
A VPRecipeValue defined by a multi-def recipe, stores a pointer to it.
Definition VPlanValue.h:364
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1607
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1636
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
Definition VPlan.h:1665
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1631
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4551
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1656
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1616
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1641
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1645
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3719
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3730
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3714
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:550
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4752
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenMemIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:420
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:558
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:477
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:526
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:404
const VPBasicBlock * getParent() const
Definition VPlan.h:478
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:531
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:523
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:467
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3381
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3360
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3384
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3371
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2929
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2915
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2897
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2908
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2941
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2923
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2878
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2932
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2946
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPReductionPHIRecipe * cloneWithOperands(VPValue *Start, VPValue *BackedgeValue)
Definition VPlan.h:2890
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2938
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2926
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3238
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3247
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3323
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3292
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3307
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3334
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3336
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3319
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3272
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3321
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3278
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3325
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3332
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3327
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3286
static bool classof(const VPUser *U)
Definition VPlan.h:3297
static bool classof(const VPValue *VPV)
Definition VPlan.h:3302
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3341
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4617
const VPBlockBase * getEntry() const
Definition VPlan.h:4661
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4693
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4678
VPBlockBase * getExiting()
Definition VPlan.h:4674
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4732
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4666
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4737
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4742
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4745
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4729
const VPBlockBase * getExiting() const
Definition VPlan.h:4673
VPBlockBase * getEntry()
Definition VPlan.h:4662
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4686
friend class VPlan
Definition VPlan.h:4618
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4657
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3403
bool isSingleScalar() const
Definition VPlan.h:3461
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3411
~VPReplicateRecipe() override=default
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
VPReplicateRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:3435
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3473
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
Definition VPlan.h:3486
bool isPredicated() const
Definition VPlan.h:3463
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3433
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3466
unsigned getOpcode() const
Definition VPlan.h:3490
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3480
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4323
VPValue * getStepValue() const
Definition VPlan.h:4296
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4290
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4263
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4309
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4275
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4304
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4300
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4256
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4317
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:615
static bool classof(const VPValue *V)
Definition VPlan.h:667
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:680
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:624
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:683
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, Value *UV=nullptr, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:619
static bool classof(const VPUser *U)
Definition VPlan.h:672
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:611
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1527
operand_range operands()
Definition VPlanValue.h:457
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:430
unsigned getNumOperands() const
Definition VPlanValue.h:424
operand_iterator op_end()
Definition VPlanValue.h:455
operand_iterator op_begin()
Definition VPlanValue.h:453
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:425
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:405
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:451
virtual bool usesScalars(const VPValue *Op) const
Returns true if the VPUser uses scalars of operand Op.
Definition VPlanValue.h:464
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:450
void addOperand(VPValue *Operand)
Definition VPlanValue.h:410
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208
unsigned getNumUsers() const
Definition VPlanValue.h:115
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2306
VPValue * getVFValue() const
Definition VPlan.h:2287
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2284
int64_t getStride() const
Definition VPlan.h:2285
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2327
VPValue * getOffset() const
Definition VPlan.h:2288
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2320
void addOffset(VPValue *Offset)
Append Offset as the offset operand.
Definition VPlan.h:2298
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2274
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2313
VPValue * getPointer() const
Definition VPlan.h:2286
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
void addPerPartOffset(VPValue *VFxPart)
Add the per-part offset (VFxPart) used for unrolled parts > 0.
Definition VPlan.h:2368
VPValue * getStride() const
Definition VPlan.h:2361
Type * getSourceElementType() const
Definition VPlan.h:2376
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2378
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2385
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2352
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2402
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2392
VPValue * getVFxPart() const
Definition VPlan.h:2363
A recipe for widening Call instructions using library calls.
Definition VPlan.h:2097
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:2104
const_operand_range args() const
Definition VPlan.h:2145
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2123
operand_range args()
Definition VPlan.h:2144
Function * getCalledScalarFunction() const
Definition VPlan.h:2140
~VPWidenCallRecipe() override=default
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV, const VPIRFlags::WrapFlagsTy &Flags={false, false})
Definition VPlan.h:4130
~VPWidenCanonicalIVRecipe() override=default
VPValue * getStepValue() const
Definition VPlan.h:4163
void addPerPartStep(VPValue *Step)
Add the per-part step (VF * Part) used for unrolled parts.
Definition VPlan.h:4168
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:4152
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:4159
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4137
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4147
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1878
Instruction::CastOps getOpcode() const
Definition VPlan.h:1914
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1883
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1899
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2236
Type * getSourceElementType() const
Definition VPlan.h:2241
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2244
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2227
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), GetElementPtrInst *UV=nullptr)
Definition VPlan.h:2210
void execute(VPTransformState &State) override=0
Generate the phi nodes.
ArrayRef< const SCEVPredicate * > getNoWrapPredicates() const
Returns the SCEV predicates associated with this induction.
Definition VPlan.h:2590
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2609
static bool classof(const VPValue *V)
Definition VPlan.h:2552
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2571
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2594
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2564
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2579
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2582
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2520
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2567
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2526
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2587
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2601
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2547
const VPValue * getVFValue() const
Definition VPlan.h:2574
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2557
const VPValue * getStepValue() const
Definition VPlan.h:2568
void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart)
After unrolling, append the splat-VF step (VF * step) and the value of the induction at the last unro...
Definition VPlan.h:2535
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2670
const TruncInst * getTruncInst() const
Definition VPlan.h:2686
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2664
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2674
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2656
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2630
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2685
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2639
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2696
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2681
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1925
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1975
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:2029
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:2035
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1961
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:2041
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2011
static bool classof(const VPValue *V)
Definition VPlan.h:2006
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1986
VPWidenIntrinsicRecipe(const unsigned char SC, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1939
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:2038
~VPWidenIntrinsicRecipe() override=default
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1996
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static bool classof(const VPUser *U)
Definition VPlan.h:2001
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
~VPWidenMemIntrinsicRecipe() override=default
VPWidenMemIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2074
VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2060
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3746
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3757
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3782
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3748
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3804
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3754
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3792
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3751
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3769
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3788
void setMask(VPValue *Mask)
Definition VPlan.h:3759
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3799
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3785
A recipe for widened phis.
Definition VPlan.h:2754
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2796
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2774
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingValues, debug location DL and Name.
Definition VPlan.h:2761
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2723
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2732
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2713
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1817
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1838
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1867
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1821
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1828
~VPWidenRecipe() override=default
VPWidenRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1840
unsigned getOpcode() const
Definition VPlan.h:1857
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4765
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:5090
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1177
friend class VPSlotTracker
Definition VPlan.h:4767
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1153
bool hasVF(ElementCount VF) const
Definition VPlan.h:4988
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:5001
const DataLayout & getDataLayout() const
Definition VPlan.h:4970
LLVMContext & getContext() const
Definition VPlan.h:4966
VPBasicBlock * getEntry()
Definition VPlan.h:4861
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5178
void setName(const Twine &newName)
Definition VPlan.h:5034
bool hasScalableVF() const
Definition VPlan.h:4989
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4924
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4945
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4995
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:902
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:885
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:5052
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:910
const VPBasicBlock * getEntry() const
Definition VPlan.h:4862
friend class VPlanPrinter
Definition VPlan.h:4766
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:5061
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:5084
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4964
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:5067
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:5139
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1312
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:5093
bool hasUF(unsigned UF) const
Definition VPlan.h:5013
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4914
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4842
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4954
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4951
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:5038
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:5127
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:5064
void setVF(ElementCount VF)
Definition VPlan.h:4976
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:5029
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5161
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1050
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1083
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:5016
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:5078
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4899
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4931
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4938
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4890
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4850
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:5116
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1318
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4983
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:5058
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4866
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1183
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4961
bool hasScalarVFOnly() const
Definition VPlan.h:5006
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4904
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:920
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1136
void addVF(ElementCount VF)
Definition VPlan.h:4974
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4910
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1092
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4957
void setUF(unsigned UF)
Definition VPlan.h:5021
const VPSymbolicValue & getVF() const
Definition VPlan.h:4958
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5171
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1224
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5072
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4336
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1113
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2851
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:89
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:380
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2849
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2843
Possible variants of a reduction.
Definition VPlan.h:2841
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2846
unsigned VFScaleFactor
Definition VPlan.h:2847
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:264
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2812
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2824
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2803
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:727
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:732
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:722
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:715
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1785
PHINode & getIRPhi()
Definition VPlan.h:1798
VPIRPhi(PHINode &PN)
Definition VPlan.h:1786
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1788
static bool classof(const VPUser *U)
Definition VPlan.h:1793
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1809
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
static bool classof(const VPUser *U)
Definition VPlan.h:1685
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1700
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1715
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1695
static bool classof(const VPValue *V)
Definition VPlan.h:1690
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="", Type *ResultTy=nullptr)
Definition VPlan.h:1680
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1117
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1158
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1129
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1118
static bool classof(const VPValue *V)
Definition VPlan.h:1151
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1123
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1146
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3860
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3870
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3877
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3861
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3887
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3810
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3811
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3836
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3819
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3830
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3963
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3979
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3972
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3964
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3992
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3982
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3909
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3910
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3927
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3918
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3933
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3939
static VPMixin * castFailed()
Definition VPlan.h:4354
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4345
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4348