LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class SCEVPredicate;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPDominatorTree;
65class VPRegionBlock;
66class VPlan;
67class VPLane;
69class Value;
71
72struct VPCostContext;
73
74using VPlanPtr = std::unique_ptr<VPlan>;
75
76/// \enum UncountableExitStyle
77/// Different methods of handling early exits.
78///
81 /// No side effects to worry about, so we can process any uncountable exits
82 /// in the loop and branch either to the middle block if the trip count was
83 /// reached, or an early exitblock to determine which exit was taken.
85 /// All memory operations other than the load(s) required to determine whether
86 /// an uncountable exit occurre will be masked based on that condition. If an
87 /// uncountable exit is taken, then all lanes before the exiting lane will
88 /// complete, leaving just the final lane to execute in the scalar tail.
90};
91
92/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
93/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
95 friend class VPBlockUtils;
96
97 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
98
99 /// An optional name for the block.
100 std::string Name;
101
102 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
103 /// it is a topmost VPBlockBase.
104 VPRegionBlock *Parent = nullptr;
105
106 /// List of predecessor blocks.
108
109 /// List of successor blocks.
111
112 /// VPlan containing the block. Can only be set on the entry block of the
113 /// plan.
114 VPlan *Plan = nullptr;
115
116 /// Add \p Successor as the last successor to this block.
117 void appendSuccessor(VPBlockBase *Successor) {
118 assert(Successor && "Cannot add nullptr successor!");
119 Successors.push_back(Successor);
120 }
121
122 /// Add \p Predecessor as the last predecessor to this block.
123 void appendPredecessor(VPBlockBase *Predecessor) {
124 assert(Predecessor && "Cannot add nullptr predecessor!");
125 Predecessors.push_back(Predecessor);
126 }
127
128 /// Remove \p Predecessor from the predecessors of this block.
129 void removePredecessor(VPBlockBase *Predecessor) {
130 auto Pos = find(Predecessors, Predecessor);
131 assert(Pos && "Predecessor does not exist");
132 Predecessors.erase(Pos);
133 }
134
135 /// Remove \p Successor from the successors of this block.
136 void removeSuccessor(VPBlockBase *Successor) {
137 auto Pos = find(Successors, Successor);
138 assert(Pos && "Successor does not exist");
139 Successors.erase(Pos);
140 }
141
142 /// This function replaces one predecessor with another, useful when
143 /// trying to replace an old block in the CFG with a new one.
144 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
145 auto I = find(Predecessors, Old);
146 assert(I != Predecessors.end());
147 assert(Old->getParent() == New->getParent() &&
148 "replaced predecessor must have the same parent");
149 *I = New;
150 }
151
152 /// This function replaces one successor with another, useful when
153 /// trying to replace an old block in the CFG with a new one.
154 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
155 auto I = find(Successors, Old);
156 assert(I != Successors.end());
157 assert(Old->getParent() == New->getParent() &&
158 "replaced successor must have the same parent");
159 *I = New;
160 }
161
162protected:
163 VPBlockBase(const unsigned char SC, const std::string &N)
164 : SubclassID(SC), Name(N) {}
165
166public:
167 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
168 /// that are actually instantiated. Values of this enumeration are kept in the
169 /// SubclassID field of the VPBlockBase objects. They are used for concrete
170 /// type identification.
171 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
172
174
175 virtual ~VPBlockBase() = default;
176
177 const std::string &getName() const { return Name; }
178
179 void setName(const Twine &newName) { Name = newName.str(); }
180
181 /// \return an ID for the concrete type of this object.
182 /// This is used to implement the classof checks. This should not be used
183 /// for any other purpose, as the values may change as LLVM evolves.
184 unsigned getVPBlockID() const { return SubclassID; }
185
186 VPRegionBlock *getParent() { return Parent; }
187 const VPRegionBlock *getParent() const { return Parent; }
188
189 /// \return A pointer to the plan containing the current block.
190 VPlan *getPlan();
191 const VPlan *getPlan() const;
192
193 /// Sets the pointer of the plan containing the block. The block must be the
194 /// entry block into the VPlan.
195 void setPlan(VPlan *ParentPlan);
196
197 void setParent(VPRegionBlock *P) { Parent = P; }
198
199 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
200 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
201 /// VPBlockBase is a VPBasicBlock, it is returned.
202 const VPBasicBlock *getEntryBasicBlock() const;
203 VPBasicBlock *getEntryBasicBlock();
204
205 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
206 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
207 /// VPBlockBase is a VPBasicBlock, it is returned.
208 const VPBasicBlock *getExitingBasicBlock() const;
209 VPBasicBlock *getExitingBasicBlock();
210
211 const VPBlocksTy &getSuccessors() const { return Successors; }
212 VPBlocksTy &getSuccessors() { return Successors; }
213
214 /// Returns true if this block has any successors.
215 bool hasSuccessors() const { return !Successors.empty(); }
216 /// Returns true if this block has any predecessors.
217 bool hasPredecessors() const { return !Predecessors.empty(); }
218
221
222 const VPBlocksTy &getPredecessors() const { return Predecessors; }
223 VPBlocksTy &getPredecessors() { return Predecessors; }
224
225 /// \return the successor of this VPBlockBase if it has a single successor.
226 /// Otherwise return a null pointer.
228 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
229 }
230
231 /// \return the predecessor of this VPBlockBase if it has a single
232 /// predecessor. Otherwise return a null pointer.
234 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
235 }
236
237 size_t getNumSuccessors() const { return Successors.size(); }
238 size_t getNumPredecessors() const { return Predecessors.size(); }
239
240 /// An Enclosing Block of a block B is any block containing B, including B
241 /// itself. \return the closest enclosing block starting from "this", which
242 /// has successors. \return the root enclosing block if all enclosing blocks
243 /// have no successors.
244 VPBlockBase *getEnclosingBlockWithSuccessors();
245
246 /// \return the closest enclosing block starting from "this", which has
247 /// predecessors. \return the root enclosing block if all enclosing blocks
248 /// have no predecessors.
249 VPBlockBase *getEnclosingBlockWithPredecessors();
250
251 /// \return the successors either attached directly to this VPBlockBase or, if
252 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
253 /// successors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has successors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) successors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithSuccessors()->getSuccessors();
259 }
260
261 /// \return the hierarchical successor of this VPBlockBase if it has a single
262 /// hierarchical successor. Otherwise return a null pointer.
264 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
265 }
266
267 /// \return the predecessors either attached directly to this VPBlockBase or,
268 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
269 /// predecessors of its own, search recursively for the first enclosing
270 /// VPRegionBlock that has predecessors and return them. If no such
271 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
272 /// VPBlockBase reached.
274 return getEnclosingBlockWithPredecessors()->getPredecessors();
275 }
276
277 /// \return the hierarchical predecessor of this VPBlockBase if it has a
278 /// single hierarchical predecessor. Otherwise return a null pointer.
282
283 /// Set a given VPBlockBase \p Successor as the single successor of this
284 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
285 /// This VPBlockBase must have no successors.
287 assert(Successors.empty() && "Setting one successor when others exist.");
288 assert(Successor->getParent() == getParent() &&
289 "connected blocks must have the same parent");
290 appendSuccessor(Successor);
291 }
292
293 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
294 /// successors of this VPBlockBase. This VPBlockBase is not added as
295 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
296 /// successors.
297 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
298 assert(Successors.empty() && "Setting two successors when others exist.");
299 appendSuccessor(IfTrue);
300 appendSuccessor(IfFalse);
301 }
302
303 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
304 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
305 /// as successor of any VPBasicBlock in \p NewPreds.
307 assert(Predecessors.empty() && "Block predecessors already set.");
308 for (auto *Pred : NewPreds)
309 appendPredecessor(Pred);
310 }
311
312 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
313 /// This VPBlockBase must have no successors. This VPBlockBase is not added
314 /// as predecessor of any VPBasicBlock in \p NewSuccs.
316 assert(Successors.empty() && "Block successors already set.");
317 for (auto *Succ : NewSuccs)
318 appendSuccessor(Succ);
319 }
320
321 /// Remove all the predecessor of this block.
322 void clearPredecessors() { Predecessors.clear(); }
323
324 /// Remove all the successors of this block.
325 void clearSuccessors() { Successors.clear(); }
326
327 /// Swap predecessors of the block. The block must have exactly 2
328 /// predecessors.
330 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
331 std::swap(Predecessors[0], Predecessors[1]);
332 }
333
334 /// Swap successors of the block. The block must have exactly 2 successors.
335 // TODO: This should be part of introducing conditional branch recipes rather
336 // than being independent.
338 assert(Successors.size() == 2 && "must have 2 successors to swap");
339 std::swap(Successors[0], Successors[1]);
340 }
341
342 /// Returns the index for \p Pred in the blocks predecessors list.
343 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
344 assert(count(Predecessors, Pred) == 1 &&
345 "must have Pred exactly once in Predecessors");
346 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
347 }
348
349 /// Returns the index for \p Succ in the blocks successor list.
350 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
351 assert(count(Successors, Succ) == 1 &&
352 "must have Succ exactly once in Successors");
353 return std::distance(Successors.begin(), find(Successors, Succ));
354 }
355
356 /// The method which generates the output IR that correspond to this
357 /// VPBlockBase, thereby "executing" the VPlan.
358 virtual void execute(VPTransformState *State) = 0;
359
360 /// Return the cost of the block.
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
364 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
365 OS << getName();
366 }
367
368 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
369 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
370 /// consequtive numbers.
371 ///
372 /// Note that the numbering is applied to the whole VPlan, so printing
373 /// individual blocks is consistent with the whole VPlan printing.
374 virtual void print(raw_ostream &O, const Twine &Indent,
375 VPSlotTracker &SlotTracker) const = 0;
376
377 /// Print plain-text dump of this VPlan to \p O.
378 void print(raw_ostream &O) const;
379
380 /// Print the successors of this block to \p O, prefixing all lines with \p
381 /// Indent.
382 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
383
384 /// Dump this VPBlockBase to dbgs().
385 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
386#endif
387
388 /// Clone the current block and it's recipes without updating the operands of
389 /// the cloned recipes, including all blocks in the single-entry single-exit
390 /// region for VPRegionBlocks.
391 virtual VPBlockBase *clone() = 0;
392};
393
394/// VPRecipeBase is a base class modeling a sequence of one or more output IR
395/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
396/// and is responsible for deleting its defined values. Single-value
397/// recipes must inherit from VPSingleDef instead of inheriting from both
398/// VPRecipeBase and VPValue separately.
400 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
401 public VPDef,
402 public VPUser {
403 friend VPBasicBlock;
404 friend class VPBlockUtils;
405
406 /// Subclass identifier (for isa/dyn_cast).
407 const unsigned char SubclassID;
408
409 /// Each VPRecipe belongs to a single VPBasicBlock.
410 VPBasicBlock *Parent = nullptr;
411
412 /// The debug location for the recipe.
413 DebugLoc DL;
414
415public:
416 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
417 /// that is actually instantiated. Values of this enumeration are kept in the
418 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
419 /// type identification.
420 using VPRecipeTy = enum {
421 VPBranchOnMaskSC,
422 VPDerivedIVSC,
423 VPExpandSCEVSC,
424 VPExpressionSC,
425 VPIRInstructionSC,
426 VPInstructionSC,
427 VPInterleaveEVLSC,
428 VPInterleaveSC,
429 VPReductionEVLSC,
430 VPReductionSC,
431 VPReplicateSC,
432 VPScalarIVStepsSC,
433 VPVectorPointerSC,
434 VPVectorEndPointerSC,
435 VPWidenCallSC,
436 VPWidenCanonicalIVSC,
437 VPWidenCastSC,
438 VPWidenGEPSC,
439 VPWidenIntrinsicSC,
440 VPWidenMemIntrinsicSC,
441 VPWidenLoadEVLSC,
442 VPWidenLoadSC,
443 VPWidenStoreEVLSC,
444 VPWidenStoreSC,
445 VPWidenSC,
446 VPBlendSC,
447 VPHistogramSC,
448 // START: Phi-like recipes. Need to be kept together.
449 VPWidenPHISC,
450 VPPredInstPHISC,
451 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
452 // VPHeaderPHIRecipe need to be kept together.
453 VPCurrentIterationPHISC,
454 VPActiveLaneMaskPHISC,
455 VPFirstOrderRecurrencePHISC,
456 VPWidenIntOrFpInductionSC,
457 VPWidenPointerInductionSC,
458 VPReductionPHISC,
459 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
460 // END: Phi-like recipes
461 VPFirstPHISC = VPWidenPHISC,
462 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
463 VPLastHeaderPHISC = VPReductionPHISC,
464 VPLastPHISC = VPReductionPHISC,
465 };
466
467 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
469 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
470
471 ~VPRecipeBase() override = default;
472
473 /// Clone the current recipe.
474 virtual VPRecipeBase *clone() = 0;
475
476 /// \return the VPBasicBlock which this VPRecipe belongs to.
477 VPBasicBlock *getParent() { return Parent; }
478 const VPBasicBlock *getParent() const { return Parent; }
479
480 /// \return the VPRegionBlock which the recipe belongs to.
481 VPRegionBlock *getRegion();
482 const VPRegionBlock *getRegion() const;
483
484 /// The method which generates the output IR instructions that correspond to
485 /// this VPRecipe, thereby "executing" the VPlan.
486 virtual void execute(VPTransformState &State) = 0;
487
488 /// Return the cost of this recipe, taking into account if the cost
489 /// computation should be skipped and the ForceTargetInstructionCost flag.
490 /// Also takes care of printing the cost for debugging.
492
493 /// Insert an unlinked recipe into a basic block immediately before
494 /// the specified recipe.
495 void insertBefore(VPRecipeBase *InsertPos);
496 /// Insert an unlinked recipe into \p BB immediately before the insertion
497 /// point \p IP;
498 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
499
500 /// Insert an unlinked Recipe into a basic block immediately after
501 /// the specified Recipe.
502 void insertAfter(VPRecipeBase *InsertPos);
503
504 /// Unlink this recipe from its current VPBasicBlock and insert it into
505 /// the VPBasicBlock that MovePos lives in, right after MovePos.
506 void moveAfter(VPRecipeBase *MovePos);
507
508 /// Unlink this recipe and insert into BB before I.
509 ///
510 /// \pre I is a valid iterator into BB.
511 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
512
513 /// This method unlinks 'this' from the containing basic block, but does not
514 /// delete it.
515 void removeFromParent();
516
517 /// This method unlinks 'this' from the containing basic block and deletes it.
518 ///
519 /// \returns an iterator pointing to the element after the erased one
521
522 /// \return an ID for the concrete type of this object.
523 unsigned getVPRecipeID() const { return SubclassID; }
524
525 /// Method to support type inquiry through isa, cast, and dyn_cast.
526 static inline bool classof(const VPDef *D) {
527 // All VPDefs are also VPRecipeBases.
528 return true;
529 }
530
531 static inline bool classof(const VPUser *U) { return true; }
532
533 /// Returns true if the recipe may have side-effects.
534 bool mayHaveSideEffects() const;
535
536 /// Return true if we can safely execute this recipe unconditionally even if
537 /// it is masked originally.
538 bool isSafeToSpeculativelyExecute() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Set the recipe's debug location to \p NewDL.
558 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Dump the recipe to stderr (for debugging).
562 LLVM_ABI_FOR_TEST void dump() const;
563
564 /// Print the recipe, delegating to printRecipe().
565 void print(raw_ostream &O, const Twine &Indent,
567#endif
568
569protected:
570 /// Compute the cost of this recipe either using a recipe's specialized
571 /// implementation or using the legacy cost model and the underlying
572 /// instructions.
573 virtual InstructionCost computeCost(ElementCount VF,
574 VPCostContext &Ctx) const;
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
577 /// Each concrete VPRecipe prints itself, without printing common information,
578 /// like debug info or metadata.
579 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
580 VPSlotTracker &SlotTracker) const = 0;
581#endif
582};
583
584// Helper macro to define common classof implementations for recipes.
585#define VP_CLASSOF_IMPL(VPRecipeID) \
586 static inline bool classof(const VPRecipeBase *R) { \
587 return R->getVPRecipeID() == VPRecipeID; \
588 } \
589 static inline bool classof(const VPValue *V) { \
590 auto *R = V->getDefiningRecipe(); \
591 return R && R->getVPRecipeID() == VPRecipeID; \
592 } \
593 static inline bool classof(const VPUser *U) { \
594 auto *R = dyn_cast<VPRecipeBase>(U); \
595 return R && R->getVPRecipeID() == VPRecipeID; \
596 } \
597 static inline bool classof(const VPSingleDefRecipe *R) { \
598 return R->getVPRecipeID() == VPRecipeID; \
599 }
600
601/// Compute the scalar result type for an IR \p Opcode given \p Operands.
602LLVM_ABI Type *computeScalarTypeForInstruction(unsigned Opcode,
603 ArrayRef<VPValue *> Operands);
604
605/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
606/// or more output IR that define a single result VPValue. Note that
607/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
609 public VPSingleDefValue {
610public:
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
613 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
614
615 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
617 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
618
619 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
620 Type *ResultTy, Value *UV = nullptr,
622 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
623
624 static inline bool classof(const VPRecipeBase *R) {
625 switch (R->getVPRecipeID()) {
626 case VPRecipeBase::VPDerivedIVSC:
627 case VPRecipeBase::VPExpandSCEVSC:
628 case VPRecipeBase::VPExpressionSC:
629 case VPRecipeBase::VPInstructionSC:
630 case VPRecipeBase::VPReductionEVLSC:
631 case VPRecipeBase::VPReductionSC:
632 case VPRecipeBase::VPReplicateSC:
633 case VPRecipeBase::VPScalarIVStepsSC:
634 case VPRecipeBase::VPVectorPointerSC:
635 case VPRecipeBase::VPVectorEndPointerSC:
636 case VPRecipeBase::VPWidenCallSC:
637 case VPRecipeBase::VPWidenCanonicalIVSC:
638 case VPRecipeBase::VPWidenCastSC:
639 case VPRecipeBase::VPWidenGEPSC:
640 case VPRecipeBase::VPWidenIntrinsicSC:
641 case VPRecipeBase::VPWidenMemIntrinsicSC:
642 case VPRecipeBase::VPWidenSC:
643 case VPRecipeBase::VPBlendSC:
644 case VPRecipeBase::VPPredInstPHISC:
645 case VPRecipeBase::VPCurrentIterationPHISC:
646 case VPRecipeBase::VPActiveLaneMaskPHISC:
647 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
648 case VPRecipeBase::VPWidenPHISC:
649 case VPRecipeBase::VPWidenIntOrFpInductionSC:
650 case VPRecipeBase::VPWidenPointerInductionSC:
651 case VPRecipeBase::VPReductionPHISC:
652 case VPRecipeBase::VPWidenLoadEVLSC:
653 case VPRecipeBase::VPWidenLoadSC:
654 return true;
655 case VPRecipeBase::VPBranchOnMaskSC:
656 case VPRecipeBase::VPInterleaveEVLSC:
657 case VPRecipeBase::VPInterleaveSC:
658 case VPRecipeBase::VPIRInstructionSC:
659 case VPRecipeBase::VPWidenStoreEVLSC:
660 case VPRecipeBase::VPWidenStoreSC:
661 case VPRecipeBase::VPHistogramSC:
662 return false;
663 }
664 llvm_unreachable("Unhandled VPRecipeID");
665 }
666
667 static inline bool classof(const VPValue *V) {
668 auto *R = V->getDefiningRecipe();
669 return R && classof(R);
670 }
671
672 static inline bool classof(const VPUser *U) {
673 auto *R = dyn_cast<VPRecipeBase>(U);
674 return R && classof(R);
675 }
676
677 VPSingleDefRecipe *clone() override = 0;
678
679 /// Returns the underlying instruction.
686
687#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
688 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
690#endif
691};
692
693/// Class to record and manage LLVM IR flags.
696 enum class OperationType : unsigned char {
697 Cmp,
698 FCmp,
699 OverflowingBinOp,
700 Trunc,
701 DisjointOp,
702 PossiblyExactOp,
703 GEPOp,
704 FPMathOp,
705 NonNegOp,
706 ReductionOp,
707 Other
708 };
709
710public:
711 struct WrapFlagsTy {
712 char HasNUW : 1;
713 char HasNSW : 1;
714
716 };
717
719 char HasNUW : 1;
720 char HasNSW : 1;
721
723 };
724
729
731 char NonNeg : 1;
732 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
733 };
734
735private:
736 struct ExactFlagsTy {
737 char IsExact : 1;
738 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
739 };
740 struct FastMathFlagsTy {
741 char AllowReassoc : 1;
742 char NoNaNs : 1;
743 char NoInfs : 1;
744 char NoSignedZeros : 1;
745 char AllowReciprocal : 1;
746 char AllowContract : 1;
747 char ApproxFunc : 1;
748
749 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
750 };
751 /// Holds both the predicate and fast-math flags for floating-point
752 /// comparisons.
753 struct FCmpFlagsTy {
754 uint8_t CmpPredStorage;
755 FastMathFlagsTy FMFs;
756 };
757 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
758 struct ReductionFlagsTy {
759 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
760 // additional kinds.
761 unsigned char Kind : 6;
762 // TODO: Derive order/in-loop from plan and remove here.
763 unsigned char IsOrdered : 1;
764 unsigned char IsInLoop : 1;
765 FastMathFlagsTy FMFs;
766
767 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
768 FastMathFlags FMFs)
769 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
770 IsInLoop(IsInLoop), FMFs(FMFs) {}
771 };
772
773 OperationType OpType;
774
775 union {
780 ExactFlagsTy ExactFlags;
783 FastMathFlagsTy FMFs;
784 FCmpFlagsTy FCmpFlags;
785 ReductionFlagsTy ReductionFlags;
787 };
788
789public:
790 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
791
793 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
794 OpType = OperationType::FCmp;
796 FCmp->getPredicate());
797 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
798 FCmpFlags.FMFs = FCmp->getFastMathFlags();
799 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
800 OpType = OperationType::Cmp;
802 Op->getPredicate());
803 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
804 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
805 OpType = OperationType::DisjointOp;
806 DisjointFlags.IsDisjoint = Op->isDisjoint();
807 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
808 OpType = OperationType::OverflowingBinOp;
809 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
810 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
811 OpType = OperationType::Trunc;
812 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
813 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
814 OpType = OperationType::PossiblyExactOp;
815 ExactFlags.IsExact = Op->isExact();
816 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
817 OpType = OperationType::GEPOp;
818 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
819 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
820 "wrap flags truncated");
821 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
822 OpType = OperationType::NonNegOp;
823 NonNegFlags.NonNeg = PNNI->hasNonNeg();
824 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
825 OpType = OperationType::FPMathOp;
826 FMFs = Op->getFastMathFlags();
827 }
828 }
829
830 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
832 assert(getPredicate() == Pred && "predicate truncated");
833 }
834
836 : OpType(OperationType::FCmp), AllFlags() {
838 assert(getPredicate() == Pred && "predicate truncated");
839 FCmpFlags.FMFs = FMFs;
840 }
841
843 : OpType(OperationType::OverflowingBinOp), AllFlags() {
844 this->WrapFlags = WrapFlags;
845 }
846
848 : OpType(OperationType::Trunc), AllFlags() {
849 this->TruncFlags = TruncFlags;
850 }
851
852 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
853 this->FMFs = FMFs;
854 }
855
857 : OpType(OperationType::DisjointOp), AllFlags() {
858 this->DisjointFlags = DisjointFlags;
859 }
860
862 : OpType(OperationType::NonNegOp), AllFlags() {
863 this->NonNegFlags = NonNegFlags;
864 }
865
866 VPIRFlags(ExactFlagsTy ExactFlags)
867 : OpType(OperationType::PossiblyExactOp), AllFlags() {
868 this->ExactFlags = ExactFlags;
869 }
870
872 : OpType(OperationType::GEPOp), AllFlags() {
873 GEPFlagsStorage = GEPFlags.getRaw();
874 }
875
876 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
877 : OpType(OperationType::ReductionOp), AllFlags() {
878 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
879 }
880
882 OpType = Other.OpType;
883 AllFlags[0] = Other.AllFlags[0];
884 AllFlags[1] = Other.AllFlags[1];
885 }
886
887 /// Only keep flags also present in \p Other. \p Other must have the same
888 /// OpType as the current object.
889 void intersectFlags(const VPIRFlags &Other);
890
891 /// Drop all poison-generating flags.
893 // NOTE: This needs to be kept in-sync with
894 // Instruction::dropPoisonGeneratingFlags.
895 switch (OpType) {
896 case OperationType::OverflowingBinOp:
897 WrapFlags.HasNUW = false;
898 WrapFlags.HasNSW = false;
899 break;
900 case OperationType::Trunc:
901 TruncFlags.HasNUW = false;
902 TruncFlags.HasNSW = false;
903 break;
904 case OperationType::DisjointOp:
905 DisjointFlags.IsDisjoint = false;
906 break;
907 case OperationType::PossiblyExactOp:
908 ExactFlags.IsExact = false;
909 break;
910 case OperationType::GEPOp:
911 GEPFlagsStorage = 0;
912 break;
913 case OperationType::FPMathOp:
914 case OperationType::FCmp:
915 case OperationType::ReductionOp:
916 getFMFsRef().NoNaNs = false;
917 getFMFsRef().NoInfs = false;
918 break;
919 case OperationType::NonNegOp:
920 NonNegFlags.NonNeg = false;
921 break;
922 case OperationType::Cmp:
923 case OperationType::Other:
924 break;
925 }
926 }
927
928 /// Apply the IR flags to \p I.
929 void applyFlags(Instruction &I) const {
930 switch (OpType) {
931 case OperationType::OverflowingBinOp:
932 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
933 I.setHasNoSignedWrap(WrapFlags.HasNSW);
934 break;
935 case OperationType::Trunc:
936 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
937 I.setHasNoSignedWrap(TruncFlags.HasNSW);
938 break;
939 case OperationType::DisjointOp:
940 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
941 break;
942 case OperationType::PossiblyExactOp:
943 I.setIsExact(ExactFlags.IsExact);
944 break;
945 case OperationType::GEPOp:
946 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
948 break;
949 case OperationType::FPMathOp:
950 case OperationType::FCmp: {
951 const FastMathFlagsTy &F = getFMFsRef();
952 I.setHasAllowReassoc(F.AllowReassoc);
953 I.setHasNoNaNs(F.NoNaNs);
954 I.setHasNoInfs(F.NoInfs);
955 I.setHasNoSignedZeros(F.NoSignedZeros);
956 I.setHasAllowReciprocal(F.AllowReciprocal);
957 I.setHasAllowContract(F.AllowContract);
958 I.setHasApproxFunc(F.ApproxFunc);
959 break;
960 }
961 case OperationType::NonNegOp:
962 I.setNonNeg(NonNegFlags.NonNeg);
963 break;
964 case OperationType::ReductionOp:
965 llvm_unreachable("reduction ops should not use applyFlags");
966 case OperationType::Cmp:
967 case OperationType::Other:
968 break;
969 }
970 }
971
973 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
974 "recipe doesn't have a compare predicate");
975 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
978 }
979
981 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
982 "recipe doesn't have a compare predicate");
983 if (OpType == OperationType::FCmp)
985 else
987 assert(getPredicate() == Pred && "predicate truncated");
988 }
989
993
994 /// Returns true if the recipe has a comparison predicate.
995 bool hasPredicate() const {
996 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
997 }
998
999 /// Returns true if the recipe has fast-math flags.
1000 bool hasFastMathFlags() const {
1001 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1002 OpType == OperationType::ReductionOp;
1003 }
1004
1006
1007 /// Returns true if the recipe has non-negative flag.
1008 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1009
1010 bool isNonNeg() const {
1011 assert(OpType == OperationType::NonNegOp &&
1012 "recipe doesn't have a NNEG flag");
1013 return NonNegFlags.NonNeg;
1014 }
1015
1016 bool hasNoUnsignedWrap() const {
1017 switch (OpType) {
1018 case OperationType::OverflowingBinOp:
1019 return WrapFlags.HasNUW;
1020 case OperationType::Trunc:
1021 return TruncFlags.HasNUW;
1022 default:
1023 llvm_unreachable("recipe doesn't have a NUW flag");
1024 }
1025 }
1026
1027 bool hasNoSignedWrap() const {
1028 switch (OpType) {
1029 case OperationType::OverflowingBinOp:
1030 return WrapFlags.HasNSW;
1031 case OperationType::Trunc:
1032 return TruncFlags.HasNSW;
1033 default:
1034 llvm_unreachable("recipe doesn't have a NSW flag");
1035 }
1036 }
1037
1038 bool hasNoWrapFlags() const {
1039 switch (OpType) {
1040 case OperationType::OverflowingBinOp:
1041 case OperationType::Trunc:
1042 return true;
1043 default:
1044 return false;
1045 }
1046 }
1047
1049 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1050 }
1051
1052 bool isDisjoint() const {
1053 assert(OpType == OperationType::DisjointOp &&
1054 "recipe cannot have a disjoing flag");
1055 return DisjointFlags.IsDisjoint;
1056 }
1057
1059 assert(OpType == OperationType::ReductionOp &&
1060 "recipe doesn't have reduction flags");
1061 return static_cast<RecurKind>(ReductionFlags.Kind);
1062 }
1063
1064 bool isReductionOrdered() const {
1065 assert(OpType == OperationType::ReductionOp &&
1066 "recipe doesn't have reduction flags");
1067 return ReductionFlags.IsOrdered;
1068 }
1069
1070 bool isReductionInLoop() const {
1071 assert(OpType == OperationType::ReductionOp &&
1072 "recipe doesn't have reduction flags");
1073 return ReductionFlags.IsInLoop;
1074 }
1075
1076private:
1077 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1078 FastMathFlagsTy &getFMFsRef() {
1079 if (OpType == OperationType::FCmp)
1080 return FCmpFlags.FMFs;
1081 if (OpType == OperationType::ReductionOp)
1082 return ReductionFlags.FMFs;
1083 return FMFs;
1084 }
1085 const FastMathFlagsTy &getFMFsRef() const {
1086 if (OpType == OperationType::FCmp)
1087 return FCmpFlags.FMFs;
1088 if (OpType == OperationType::ReductionOp)
1089 return ReductionFlags.FMFs;
1090 return FMFs;
1091 }
1092
1093public:
1094 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1095 /// otherwise. Opcodes not supporting default flags include compares and
1096 /// ComputeReductionResult.
1097 static VPIRFlags getDefaultFlags(unsigned Opcode);
1098
1099#if !defined(NDEBUG)
1100 /// Returns true if the set flags are valid for \p Opcode.
1101 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1102
1103 /// Returns true if \p Opcode has its required flags set.
1104 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1105#endif
1106
1107#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1108 void printFlags(raw_ostream &O) const;
1109#endif
1110};
1112
1113static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1114
1115/// A pure-virtual common base class for recipes defining a single VPValue and
1116/// using IR flags.
1118 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1119 const VPIRFlags &Flags,
1121 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1122
1123 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1124 Type *ResultTy, const VPIRFlags &Flags,
1126 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1127 VPIRFlags(Flags) {}
1128
1129 static inline bool classof(const VPRecipeBase *R) {
1130 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1143 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1144 }
1145
1146 static inline bool classof(const VPUser *U) {
1147 auto *R = dyn_cast<VPRecipeBase>(U);
1148 return R && classof(R);
1149 }
1150
1151 static inline bool classof(const VPValue *V) {
1152 auto *R = V->getDefiningRecipe();
1153 return R && classof(R);
1154 }
1155
1157
1158 static inline bool classof(const VPSingleDefRecipe *R) {
1159 return classof(static_cast<const VPRecipeBase *>(R));
1160 }
1161
1162 void execute(VPTransformState &State) override = 0;
1163
1164 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1166 VPCostContext &Ctx) const;
1167};
1168
1169/// Helper to manage IR metadata for recipes. It filters out metadata that
1170/// cannot be propagated.
1173
1174public:
1175 VPIRMetadata() = default;
1176
1177 /// Adds metatadata that can be preserved from the original instruction
1178 /// \p I.
1180
1181 /// Copy constructor for cloning.
1183
1185
1186 /// Add all metadata to \p I.
1187 void applyMetadata(Instruction &I) const;
1188
1189 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1190 /// already exists, it will be replaced. Otherwise, it will be added.
1191 void setMetadata(unsigned Kind, MDNode *Node) {
1192 auto It =
1193 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1194 return P.first == Kind;
1195 });
1196 if (It != Metadata.end())
1197 It->second = Node;
1198 else
1199 Metadata.emplace_back(Kind, Node);
1200 }
1201
1202 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1203 /// nodes that are common to both.
1204 void intersect(const VPIRMetadata &MD);
1205
1206 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1207 MDNode *getMetadata(unsigned Kind) const {
1208 auto It =
1209 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1210 return It != Metadata.end() ? It->second : nullptr;
1211 }
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214 /// Print metadata with node IDs.
1215 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1216#endif
1217};
1218
1219/// This is a concrete Recipe that models a single VPlan-level instruction.
1220/// While as any Recipe it may generate a sequence of IR instructions when
1221/// executed, these instructions would always form a single-def expression as
1222/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1223/// opcodes can take an optional mask. Masks may be assigned during
1224/// predication.
1226 public VPIRMetadata {
1227public:
1228 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1229 enum {
1231 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1232 // values of a first-order recurrence.
1234 // Creates a mask where each lane is active (true) whilst the current
1235 // counter (first operand + index) is less than the second operand. i.e.
1236 // mask[i] = icmpt ult (op0 + i), op1
1237 // The size of the mask returned is VF * Multiplier (UF, third op).
1240 // Represents the incoming loop-invariant alias-mask. All memory accesses
1241 // in the loop must stay within the active lanes.
1244 // Increment the canonical IV separately for each unrolled part.
1246 // Abstract instruction that compares two values and branches. This is
1247 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1250 // Branch with 2 boolean condition operands and 3 successors. If condition
1251 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1252 // successor 1; otherwise branches to successor 2. Expanded after region
1253 // dissolution into: (1) an OR of the two conditions branching to
1254 // middle.split or successor 2, and (2) middle.split branching to successor
1255 // 0 or successor 1 based on condition 0.
1258 /// Given operands of (the same) struct type, creates a struct of fixed-
1259 /// width vectors each containing a struct field of all operands. The
1260 /// number of operands matches the element count of every vector.
1262 /// Creates a fixed-width vector containing all operands. The number of
1263 /// operands matches the vector element count.
1265 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1266 /// abstract VPInstruction whose single defined VPValue represents VF
1267 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1268 /// VPInstructions.
1270 /// Reduce the operands to the final reduction result using the operation
1271 /// specified via the operation's VPIRFlags.
1273 // Extracts the last part of its operand. Removed during unrolling.
1275 // Extracts the last lane of its vector operand, per part.
1277 // Extracts the second-to-last lane from its operand or the second-to-last
1278 // part if it is scalar. In the latter case, the recipe will be removed
1279 // during unrolling.
1281 LogicalAnd, // Non-poison propagating logical And.
1282 LogicalOr, // Non-poison propagating logical Or.
1283 NumActiveLanes, // Counts the number of active lanes in a mask.
1284 // Add an offset in bytes (second operand) to a base pointer (first
1285 // operand). Only generates scalar values (either for the first lane only or
1286 // for all lanes, depending on its uses).
1288 // Add a vector offset in bytes (second operand) to a scalar base pointer
1289 // (first operand).
1291 // Returns a scalar boolean value, which is true if any lane of its
1292 // (boolean) vector operands is true. It produces the reduced value across
1293 // all unrolled iterations. Unrolling will add all copies of its original
1294 // operand as additional operands. AnyOf is poison-safe as all operands
1295 // will be frozen.
1297 // Calculates the first active lane index of the vector predicate operands.
1298 // It produces the lane index across all unrolled iterations. Unrolling will
1299 // add all copies of its original operand as additional operands.
1300 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1301 // result even with operands that are all zeroes.
1303 // Calculates the last active lane index of the vector predicate operands.
1304 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1305 // tail-folding to extract the correct live-out value from the last active
1306 // iteration. It produces the lane index across all unrolled iterations.
1307 // Unrolling will add all copies of its original operand as additional
1308 // operands.
1310 // Returns a reversed vector for the operand.
1312 /// Start vector for reductions with 3 operands: the original start value,
1313 /// the identity value for the reduction and an integer indicating the
1314 /// scaling factor.
1316 /// Extracts a single lane (first operand) from a set of vector operands.
1317 /// The lane specifies an index into a vector formed by combining all vector
1318 /// operands (all operands after the first one).
1320 /// Explicit user for the resume phi of the canonical induction in the main
1321 /// VPlan, used by the epilogue vector loop.
1323 /// Extracts the last active lane from a set of vectors. The first operand
1324 /// is the default value if no lanes in the masks are active. Conceptually,
1325 /// this concatenates all data vectors (odd operands), concatenates all
1326 /// masks (even operands -- ignoring the default value), and returns the
1327 /// last active value from the combined data vector using the combined mask.
1329 /// Compute the exiting value of a wide induction after vectorization, that
1330 /// is the value of the last lane of the induction increment (i.e. its
1331 /// backedge value). Has the wide induction recipe as operand.
1334
1335 // The opcodes below are used for VPInstructionWithType.
1336 // NOTE: VPInstructionWithType classes are also used for:
1337 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1338 // cases where createScalarCast, createScalarZExtOrTrunc and
1339 // createScalarSExtOrTrunc are invoked.
1340 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1341
1342 /// Scale the first operand (vector step) by the second operand
1343 /// (scalar-step). Casts both operands to the result type if needed.
1345 // Creates a step vector starting from 0 to VF with a step of 1.
1347 /// Returns the value for vscale.
1349
1351 };
1352
1353 /// Returns true if this VPInstruction generates scalar values for all lanes.
1354 /// Most VPInstructions generate a single value per part, either vector or
1355 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1356 /// values per all lanes, stemming from an original ingredient. This method
1357 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1358 /// underlying ingredient.
1359 bool doesGeneratePerAllLanes() const;
1360
1361 /// Return the number of operands determined by the opcode of the
1362 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1363 /// cannot be determined directly by the opcode.
1364 unsigned getNumOperandsForOpcode() const;
1365
1366private:
1367 typedef unsigned char OpcodeTy;
1368 OpcodeTy Opcode;
1369
1370 /// An optional name that can be used for the generated IR instruction.
1371 std::string Name;
1372
1373 /// Returns true if we can generate a scalar for the first lane only if
1374 /// needed.
1375 bool canGenerateScalarForFirstLane() const;
1376
1377 /// Utility methods serving execute(): generates a single vector instance of
1378 /// the modeled instruction. \returns the generated value. . In some cases an
1379 /// existing value is returned rather than a generated one.
1380 Value *generate(VPTransformState &State);
1381
1382 /// Returns true if the VPInstruction does not need masking.
1383 bool alwaysUnmasked() const {
1384 if (Opcode == VPInstruction::MaskedCond)
1385 return false;
1386
1387 // For now only VPInstructions with underlying values use masks.
1388 // TODO: provide masks to VPInstructions w/o underlying values.
1389 if (!getUnderlyingValue())
1390 return true;
1391
1392 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1393 Opcode == Instruction::GetElementPtr;
1394 }
1395
1396public:
1397 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1398 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1399 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
1400 Type *ResultTy = nullptr);
1401
1402 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1403
1404 VPInstruction *clone() override {
1406 }
1407
1409 Type *ResultTy = nullptr) {
1410 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1411 getDebugLoc(), Name, ResultTy);
1412 if (getUnderlyingValue())
1413 New->setUnderlyingValue(getUnderlyingInstr());
1414 return New;
1415 }
1416
1417 unsigned getOpcode() const { return Opcode; }
1418
1419 /// Add \p Op as operand of this VPInstruction. Only supported for AnyOf,
1420 /// ComputeReductionResult, BuildVector, BuildStructVector, ExtractLane,
1421 /// ExtractLastActive, FirstActiveLane, LastActiveLane.
1422 void addOperand(VPValue *Op);
1423
1424 /// Generate the instruction.
1425 /// TODO: We currently execute only per-part unless a specific instance is
1426 /// provided.
1427 void execute(VPTransformState &State) override;
1428
1429 /// Return the cost of this VPInstruction.
1430 InstructionCost computeCost(ElementCount VF,
1431 VPCostContext &Ctx) const override;
1432
1433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1434 /// Print the VPInstruction to dbgs() (for debugging).
1435 LLVM_DUMP_METHOD void dump() const;
1436#endif
1437
1438 bool hasResult() const {
1439 // CallInst may or may not have a result, depending on the called function.
1440 // Conservatively return calls have results for now.
1441 switch (getOpcode()) {
1442 case Instruction::Ret:
1443 case Instruction::UncondBr:
1444 case Instruction::CondBr:
1445 case Instruction::Store:
1446 case Instruction::Switch:
1447 case Instruction::IndirectBr:
1448 case Instruction::Resume:
1449 case Instruction::CatchRet:
1450 case Instruction::Unreachable:
1451 case Instruction::Fence:
1452 case Instruction::AtomicRMW:
1456 return false;
1457 default:
1458 return true;
1459 }
1460 }
1461
1462 /// Returns true if the VPInstruction has a mask operand.
1463 bool isMasked() const {
1464 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1465 // VPInstructions without a fixed number of operands cannot be masked.
1466 if (NumOpsForOpcode == -1u)
1467 return false;
1468 return NumOpsForOpcode + 1 == getNumOperands();
1469 }
1470
1471 /// Returns the number of operands, excluding the mask if the VPInstruction is
1472 /// masked.
1473 unsigned getNumOperandsWithoutMask() const {
1474 return getNumOperands() - isMasked();
1475 }
1476
1477 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1478 void addMask(VPValue *Mask) {
1479 assert(!isMasked() && "recipe is already masked");
1480 if (alwaysUnmasked())
1481 return;
1482 assert(Mask->getScalarType()->isIntegerTy(1) &&
1483 "Mask must be an i1 (vector)");
1484 VPUser::addOperand(Mask);
1485 }
1486
1487 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1488 /// VPInstructions.
1489 VPValue *getMask() const {
1490 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1491 }
1492
1493 /// Returns an iterator range over the operands excluding the mask operand
1494 /// if present.
1501
1502 /// Returns true if the underlying opcode may read from or write to memory.
1503 bool opcodeMayReadOrWriteFromMemory() const;
1504
1505 /// Returns true if the recipe only uses the first lane of operand \p Op.
1506 bool usesFirstLaneOnly(const VPValue *Op) const override;
1507
1508 /// Returns true if the recipe only uses the first part of operand \p Op.
1509 bool usesFirstPartOnly(const VPValue *Op) const override;
1510
1511 /// Returns true if this VPInstruction produces a scalar value from a vector,
1512 /// e.g. by performing a reduction or extracting a lane.
1513 bool isVectorToScalar() const;
1514
1515 /// Returns true if this VPInstruction's operands are single scalars and the
1516 /// result is also a single scalar.
1517 bool isSingleScalar() const;
1518
1519 /// Returns the symbolic name assigned to the VPInstruction.
1520 StringRef getName() const { return Name; }
1521
1522 /// Set the symbolic name for the VPInstruction.
1523 void setName(StringRef NewName) { Name = NewName.str(); }
1524
1525protected:
1526#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1527 /// Print the VPInstruction to \p O.
1528 void printRecipe(raw_ostream &O, const Twine &Indent,
1529 VPSlotTracker &SlotTracker) const override;
1530#endif
1531};
1532
1533/// A specialization of VPInstruction augmenting it with a dedicated result
1534/// type, to be used when the opcode and operands of the VPInstruction don't
1535/// directly determine the result type. Note that there is no separate recipe ID
1536/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1537/// distinguished purely by the opcode.
1538/// TODO: Merge with VPInstruction, now that VPRecipeValue provides the type.
1540public:
1542 Type *ResultTy, const VPIRFlags &Flags = {},
1543 const VPIRMetadata &Metadata = {},
1545 const Twine &Name = "", Value *UV = nullptr)
1546 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {
1548 }
1549
1550 static inline bool classof(const VPRecipeBase *R) {
1551 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1552 // type information.
1553 auto *VPI = dyn_cast<VPInstruction>(R);
1554 if (!VPI)
1555 return false;
1556 unsigned Opc = VPI->getOpcode();
1558 return true;
1559 switch (Opc) {
1563 case Instruction::Load:
1564 return true;
1565 default:
1566 return false;
1567 }
1568 }
1569
1570 static inline bool classof(const VPUser *R) {
1572 }
1573
1574 VPInstruction *clone() override {
1575 auto *New =
1577 *this, *this, getDebugLoc(), getName());
1578 New->setUnderlyingValue(getUnderlyingValue());
1579 return New;
1580 }
1581
1582 void execute(VPTransformState &State) override;
1583
1584 /// Return the cost of this VPInstruction.
1586 VPCostContext &Ctx) const override;
1587
1588 Type *getResultType() const { return getScalarType(); }
1589
1590 /// Cast recipes always use scalars of their operand.
1591 bool usesScalars(const VPValue *Op) const override {
1593 return true;
1595 }
1596
1597protected:
1598#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1599 /// Print the recipe.
1600 void printRecipe(raw_ostream &O, const Twine &Indent,
1601 VPSlotTracker &SlotTracker) const override;
1602#endif
1603};
1604
1605/// Helper type to provide functions to access incoming values and blocks for
1606/// phi-like recipes.
1608protected:
1609 /// Return a VPRecipeBase* to the current object.
1610 virtual const VPRecipeBase *getAsRecipe() const = 0;
1611
1612public:
1613 virtual ~VPPhiAccessors() = default;
1614
1615 /// Returns the incoming VPValue with index \p Idx.
1616 VPValue *getIncomingValue(unsigned Idx) const {
1617 return getAsRecipe()->getOperand(Idx);
1618 }
1619
1620 /// Returns the incoming block with index \p Idx.
1621 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1622
1623 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1624 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1625
1626 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1627 /// block.
1628 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1629
1630 /// Returns the number of incoming values, also number of incoming blocks.
1631 virtual unsigned getNumIncoming() const {
1632 return getAsRecipe()->getNumOperands();
1633 }
1634
1635 /// Returns an interator range over the incoming values.
1637 return make_range(getAsRecipe()->op_begin(),
1638 getAsRecipe()->op_begin() + getNumIncoming());
1639 }
1640
1642 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1643
1644 /// Returns an iterator range over the incoming blocks.
1646 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1647 return getIncomingBlock(Idx);
1648 };
1649 return map_range(index_range(0, getNumIncoming()), GetBlock);
1650 }
1651
1652 /// Returns an iterator range over pairs of incoming values and corresponding
1653 /// incoming blocks.
1659
1660 /// Removes the incoming value for \p IncomingBlock, which must be a
1661 /// predecessor.
1662 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1663
1664 /// Append \p IncomingV as an incoming value to the phi-like recipe.
1665 void addIncoming(VPValue *IncomingV) {
1666 auto *R = const_cast<VPRecipeBase *>(getAsRecipe());
1667 assert((R->getNumOperands() == 0 ||
1668 IncomingV->getScalarType() == R->getOperand(0)->getScalarType()) &&
1669 "all incoming values must have the same type");
1670 R->addOperand(IncomingV);
1671 }
1672
1673#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1674 /// Print the recipe.
1676#endif
1677};
1678
1681 const Twine &Name = "", Type *ResultTy = nullptr)
1682 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name,
1683 ResultTy) {}
1684
1685 static inline bool classof(const VPUser *U) {
1686 auto *VPI = dyn_cast<VPInstruction>(U);
1687 return VPI && VPI->getOpcode() == Instruction::PHI;
1688 }
1689
1690 static inline bool classof(const VPValue *V) {
1691 auto *VPI = dyn_cast<VPInstruction>(V);
1692 return VPI && VPI->getOpcode() == Instruction::PHI;
1693 }
1694
1695 static inline bool classof(const VPSingleDefRecipe *SDR) {
1696 auto *VPI = dyn_cast<VPInstruction>(SDR);
1697 return VPI && VPI->getOpcode() == Instruction::PHI;
1698 }
1699
1700 VPPhi *clone() override {
1701 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1702 PhiR->setUnderlyingValue(getUnderlyingValue());
1703 return PhiR;
1704 }
1705
1706 void execute(VPTransformState &State) override;
1707
1708protected:
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 void printRecipe(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714
1715 const VPRecipeBase *getAsRecipe() const override { return this; }
1716};
1717
1718/// A recipe to wrap on original IR instruction not to be modified during
1719/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1720/// Expect PHIs, VPIRInstructions cannot have any operands.
1722 Instruction &I;
1723
1724protected:
1725 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1726 /// subclasses may need to be created, e.g. VPIRPhi.
1728 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1729
1730public:
1731 ~VPIRInstruction() override = default;
1732
1733 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1734 /// VPIRInstruction.
1736
1737 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1738
1740 auto *R = create(I);
1741 for (auto *Op : operands())
1742 R->addOperand(Op);
1743 return R;
1744 }
1745
1746 void execute(VPTransformState &State) override;
1747
1748 /// Return the cost of this VPIRInstruction.
1750 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1751
1752 Instruction &getInstruction() const { return I; }
1753
1754 bool usesScalars(const VPValue *Op) const override {
1756 "Op must be an operand of the recipe");
1757 return true;
1758 }
1759
1760 bool usesFirstPartOnly(const VPValue *Op) const override {
1762 "Op must be an operand of the recipe");
1763 return true;
1764 }
1765
1766 bool usesFirstLaneOnly(const VPValue *Op) const override {
1768 "Op must be an operand of the recipe");
1769 return true;
1770 }
1771
1772protected:
1773#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1774 /// Print the recipe.
1775 void printRecipe(raw_ostream &O, const Twine &Indent,
1776 VPSlotTracker &SlotTracker) const override;
1777#endif
1778};
1779
1780/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1781/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1782/// allowed, and it is used to add a new incoming value for the single
1783/// predecessor VPBB.
1785 public VPPhiAccessors {
1787
1788 static inline bool classof(const VPRecipeBase *U) {
1789 auto *R = dyn_cast<VPIRInstruction>(U);
1790 return R && isa<PHINode>(R->getInstruction());
1791 }
1792
1793 static inline bool classof(const VPUser *U) {
1794 auto *R = dyn_cast<VPRecipeBase>(U);
1795 return R && classof(R);
1796 }
1797
1799
1800 void execute(VPTransformState &State) override;
1801
1802protected:
1803#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1804 /// Print the recipe.
1805 void printRecipe(raw_ostream &O, const Twine &Indent,
1806 VPSlotTracker &SlotTracker) const override;
1807#endif
1808
1809 const VPRecipeBase *getAsRecipe() const override { return this; }
1810};
1811
1812/// VPWidenRecipe is a recipe for producing a widened instruction using the
1813/// opcode and operands of the recipe. This recipe covers most of the
1814/// traditional vectorization cases where each recipe transforms into a
1815/// vectorized version of itself.
1817 public VPIRMetadata {
1818 unsigned Opcode;
1819
1820public:
1822 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1823 DebugLoc DL = {})
1824 : VPWidenRecipe(I.getOpcode(), Operands, Flags, Metadata, DL) {
1825 setUnderlyingValue(&I);
1826 }
1827
1828 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1829 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1830 DebugLoc DL = {})
1831 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands,
1832 computeScalarTypeForInstruction(Opcode, Operands),
1833 Flags, DL),
1834 VPIRMetadata(Metadata), Opcode(Opcode) {}
1835
1836 ~VPWidenRecipe() override = default;
1837
1839
1841 if (auto *UV = getUnderlyingValue())
1842 return new VPWidenRecipe(*cast<Instruction>(UV), NewOperands, *this,
1843 *this, getDebugLoc());
1844 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1845 }
1846
1847 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1848
1849 /// Produce a widened instruction using the opcode and operands of the recipe,
1850 /// processing State.VF elements.
1851 void execute(VPTransformState &State) override;
1852
1853 /// Return the cost of this VPWidenRecipe.
1854 InstructionCost computeCost(ElementCount VF,
1855 VPCostContext &Ctx) const override;
1856
1857 unsigned getOpcode() const { return Opcode; }
1858
1859protected:
1860#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1861 /// Print the recipe.
1862 void printRecipe(raw_ostream &O, const Twine &Indent,
1863 VPSlotTracker &SlotTracker) const override;
1864#endif
1865
1866 /// Returns true if the recipe only uses the first lane of operand \p Op.
1867 bool usesFirstLaneOnly(const VPValue *Op) const override {
1869 "Op must be an operand of the recipe");
1870 return Opcode == Instruction::Select && Op == getOperand(0) &&
1871 Op->isDefinedOutsideLoopRegions();
1872 }
1873};
1874
1875/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1876/// TODO: Merge with VPWidenRecipe now that type is associated to every
1877/// VPRecipeValue.
1879 /// Cast instruction opcode.
1880 Instruction::CastOps Opcode;
1881
1882public:
1884 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1885 const VPIRMetadata &Metadata = {},
1887 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1888 DL),
1889 VPIRMetadata(Metadata), Opcode(Opcode) {
1890 assert(flagsValidForOpcode(Opcode) &&
1891 "Set flags not supported for the provided opcode");
1893 "Opcode requires specific flags to be set");
1895 }
1896
1897 ~VPWidenCastRecipe() override = default;
1898
1900 return new VPWidenCastRecipe(Opcode, getOperand(0), getScalarType(),
1902 *this, *this, getDebugLoc());
1903 }
1904
1905 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1906
1907 /// Produce widened copies of the cast.
1908 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1909
1910 /// Return the cost of this VPWidenCastRecipe.
1912 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1913
1914 Instruction::CastOps getOpcode() const { return Opcode; }
1915
1916protected:
1917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1918 /// Print the recipe.
1919 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1920 VPSlotTracker &SlotTracker) const override;
1921#endif
1922};
1923
1924/// A recipe for widening vector intrinsics.
1926 /// ID of the vector intrinsic to widen.
1927 Intrinsic::ID VectorIntrinsicID;
1928
1929 /// True if the intrinsic may read from memory.
1930 bool MayReadFromMemory;
1931
1932 /// True if the intrinsic may read write to memory.
1933 bool MayWriteToMemory;
1934
1935 /// True if the intrinsic may have side-effects.
1936 bool MayHaveSideEffects;
1937
1938protected:
1939 VPWidenIntrinsicRecipe(const unsigned char SC,
1940 Intrinsic::ID VectorIntrinsicID,
1941 ArrayRef<VPValue *> CallArguments, Type *Ty,
1942 const VPIRFlags &Flags = {},
1943 const VPIRMetadata &MD = {},
1945 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1946 VectorIntrinsicID(VectorIntrinsicID) {
1947 LLVMContext &Ctx = Ty->getContext();
1948 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1949 MemoryEffects ME = Attrs.getMemoryEffects();
1950 MayReadFromMemory = !ME.onlyWritesMemory();
1951 MayWriteToMemory = !ME.onlyReadsMemory();
1952 MayHaveSideEffects = MayWriteToMemory ||
1953 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1954 !Attrs.hasAttribute(Attribute::WillReturn);
1955 }
1956
1957 /// Helper function to produce the widened intrinsic call.
1958 CallInst *createVectorCall(VPTransformState &State);
1959
1960public:
1962 ArrayRef<VPValue *> CallArguments, Type *Ty,
1963 const VPIRFlags &Flags = {},
1964 const VPIRMetadata &MD = {},
1966 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1967 Flags, DL),
1968 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1969 MayReadFromMemory(CI.mayReadFromMemory()),
1970 MayWriteToMemory(CI.mayWriteToMemory()),
1971 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1972 setUnderlyingValue(&CI);
1973 }
1974
1976 ArrayRef<VPValue *> CallArguments, Type *Ty,
1977 const VPIRFlags &Flags = {},
1978 const VPIRMetadata &Metadata = {},
1980 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1981 VectorIntrinsicID, CallArguments, Ty, Flags,
1982 Metadata, DL) {}
1983
1984 ~VPWidenIntrinsicRecipe() override = default;
1985
1987 if (Value *CI = getUnderlyingValue())
1988 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1989 operands(), getScalarType(), *this,
1990 *this, getDebugLoc());
1991 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1992 getScalarType(), *this, *this,
1993 getDebugLoc());
1994 }
1995
1996 static inline bool classof(const VPRecipeBase *R) {
1997 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1998 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1999 }
2000
2001 static inline bool classof(const VPUser *U) {
2002 auto *R = dyn_cast<VPRecipeBase>(U);
2003 return R && classof(R);
2004 }
2005
2006 static inline bool classof(const VPValue *V) {
2007 auto *R = V->getDefiningRecipe();
2008 return R && classof(R);
2009 }
2010
2011 static inline bool classof(const VPSingleDefRecipe *R) {
2012 return classof(static_cast<const VPRecipeBase *>(R));
2013 }
2014
2015 /// Produce a widened version of the vector intrinsic.
2016 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
2017
2018 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
2021 const VPRecipeWithIRFlags &R,
2022 ElementCount VF, VPCostContext &Ctx);
2023
2024 /// Return the cost of this vector intrinsic.
2026 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
2027
2028 /// Return the ID of the intrinsic.
2029 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2030
2031 /// Return to name of the intrinsic as string.
2033
2034 /// Returns true if the intrinsic may read from memory.
2035 bool mayReadFromMemory() const { return MayReadFromMemory; }
2036
2037 /// Returns true if the intrinsic may write to memory.
2038 bool mayWriteToMemory() const { return MayWriteToMemory; }
2039
2040 /// Returns true if the intrinsic may have side-effects.
2041 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2042
2043 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2044
2045protected:
2046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2047 /// Print the recipe.
2048 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2049 VPSlotTracker &SlotTracker) const override;
2050#endif
2051};
2052
2053/// A recipe for widening vector memory intrinsics.
2055 /// Alignment information for this memory access.
2056 Align Alignment;
2057
2058public:
2059 // TODO: support StoreInst for strided store
2061 ArrayRef<VPValue *> CallArguments, Type *Ty,
2062 Align Alignment, const VPIRMetadata &MD = {},
2064 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2065 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2066 DL),
2067 Alignment(Alignment) {
2068 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2069 "Unexpected intrinsic");
2070 }
2071
2072 ~VPWidenMemIntrinsicRecipe() override = default;
2073
2076 getScalarType(), Alignment, *this,
2077 getDebugLoc());
2078 }
2079
2080 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2081
2082 /// Produce a widened version of the vector memory intrinsic.
2083 void execute(VPTransformState &State) override;
2084
2085 /// Helper function for computing the cost of vector memory intrinsic.
2087 bool IsMasked, Align Alignment,
2088 VPCostContext &Ctx);
2089
2090 /// Return the cost of this vector memory intrinsic.
2092 VPCostContext &Ctx) const override;
2093};
2094
2095/// A recipe for widening Call instructions using library calls.
2097 public VPIRMetadata {
2098 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2099 /// between a given VF and the chosen vectorized variant, so there will be a
2100 /// different VPlan for each VF with a valid variant.
2101 Function *Variant;
2102
2103public:
2105 ArrayRef<VPValue *> CallArguments,
2106 const VPIRFlags &Flags = {},
2107 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2108 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2109 toScalarizedTy(Variant->getReturnType()), Flags,
2110 DL),
2111 VPIRMetadata(Metadata), Variant(Variant) {
2112 setUnderlyingValue(UV);
2113 assert(
2114 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2115 "last operand must be the called function");
2116 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2117 ->getReturnType() == getScalarType() &&
2118 "Scalar type must match return type of called scalar function");
2119 }
2120
2121 ~VPWidenCallRecipe() override = default;
2122
2124 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2125 *this, *this, getDebugLoc());
2126 }
2127
2128 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2129
2130 /// Produce a widened version of the call instruction.
2131 void execute(VPTransformState &State) override;
2132
2133 /// Return the cost of this VPWidenCallRecipe.
2134 InstructionCost computeCost(ElementCount VF,
2135 VPCostContext &Ctx) const override;
2136
2137 /// Return the cost of widening a call using the vector function \p Variant.
2138 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2139
2143
2146
2147 /// Returns true if the recipe only uses the first lane of operand \p Op.
2148 bool usesFirstLaneOnly(const VPValue *Op) const override;
2149
2150protected:
2151#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2152 /// Print the recipe.
2153 void printRecipe(raw_ostream &O, const Twine &Indent,
2154 VPSlotTracker &SlotTracker) const override;
2155#endif
2156};
2157
2158/// A recipe representing a sequence of load -> update -> store as part of
2159/// a histogram operation. This means there may be aliasing between vector
2160/// lanes, which is handled by the llvm.experimental.vector.histogram family
2161/// of intrinsics. The only update operations currently supported are
2162/// 'add' and 'sub' where the other term is loop-invariant.
2164 /// Opcode of the update operation, currently either add or sub.
2165 unsigned Opcode;
2166
2167public:
2168 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2169 const VPIRMetadata &Metadata = {},
2171 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2172 VPIRMetadata(Metadata), Opcode(Opcode) {}
2173
2174 ~VPHistogramRecipe() override = default;
2175
2177 return new VPHistogramRecipe(Opcode, operands(), *this, getDebugLoc());
2178 }
2179
2180 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2181
2182 /// Produce a vectorized histogram operation.
2183 void execute(VPTransformState &State) override;
2184
2185 /// Return the cost of this VPHistogramRecipe.
2187 VPCostContext &Ctx) const override;
2188
2189 unsigned getOpcode() const { return Opcode; }
2190
2191 /// Return the mask operand if one was provided, or a null pointer if all
2192 /// lanes should be executed unconditionally.
2193 VPValue *getMask() const {
2194 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2195 }
2196
2197protected:
2198#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2199 /// Print the recipe
2200 void printRecipe(raw_ostream &O, const Twine &Indent,
2201 VPSlotTracker &SlotTracker) const override;
2202#endif
2203};
2204
2205/// A recipe for handling GEP instructions.
2207 Type *SourceElementTy;
2208
2209public:
2210 VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
2211 const VPIRFlags &Flags = {},
2213 GetElementPtrInst *UV = nullptr)
2214 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2215 Operands[0]->getScalarType(), Flags, DL),
2216 SourceElementTy(SourceElementTy) {
2217 if (UV) {
2218 setUnderlyingValue(UV);
2221 assert(Metadata.empty() && "unexpected metadata on GEP");
2222 }
2223 }
2224
2225 ~VPWidenGEPRecipe() override = default;
2226
2232
2233 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2234
2235 /// This recipe generates a GEP instruction.
2236 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2237
2238 /// Generate the gep nodes.
2239 void execute(VPTransformState &State) override;
2240
2241 Type *getSourceElementType() const { return SourceElementTy; }
2242
2243 /// Return the cost of this VPWidenGEPRecipe.
2245 VPCostContext &Ctx) const override {
2246 // TODO: Compute accurate cost after retiring the legacy cost model.
2247 return 0;
2248 }
2249
2250 /// Returns true if the recipe only uses the first lane of operand \p Op.
2251 bool usesFirstLaneOnly(const VPValue *Op) const override;
2252
2253protected:
2254#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2255 /// Print the recipe.
2256 void printRecipe(raw_ostream &O, const Twine &Indent,
2257 VPSlotTracker &SlotTracker) const override;
2258#endif
2259};
2260
2261/// A recipe to compute a pointer to the last element of each part of a widened
2262/// memory access for widened memory accesses of SourceElementTy. Used for
2263/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2264/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2265/// unroller otherwise.
2267 Type *SourceElementTy;
2268
2269 /// The constant stride of the pointer computed by this recipe, expressed in
2270 /// units of SourceElementTy.
2271 int64_t Stride;
2272
2273public:
2274 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2275 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2276 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2277 Ptr->getScalarType(), GEPFlags, DL),
2278 SourceElementTy(SourceElementTy), Stride(Stride) {
2279 assert(Stride < 0 && "Stride must be negative");
2280 }
2281
2282 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2283
2284 Type *getSourceElementType() const { return SourceElementTy; }
2285 int64_t getStride() const { return Stride; }
2286 VPValue *getPointer() const { return getOperand(0); }
2287 VPValue *getVFValue() const { return getOperand(1); }
2289 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2290 }
2291
2292 /// Adds the offset operand to the recipe.
2293 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2294 void materializeOffset(unsigned Part = 0);
2295
2296 /// Append \p Offset as the offset operand. The offset is an integer index
2297 /// expressed in units of SourceElementTy.
2299 assert(Offset->getScalarType()->isIntegerTy() &&
2300 "offset must be an integer index");
2302 }
2303
2304 void execute(VPTransformState &State) override;
2305
2306 bool usesFirstLaneOnly(const VPValue *Op) const override {
2308 "Op must be an operand of the recipe");
2309 return true;
2310 }
2311
2312 /// Return the cost of this VPVectorPointerRecipe.
2314 VPCostContext &Ctx) const override {
2315 // TODO: Compute accurate cost after retiring the legacy cost model.
2316 return 0;
2317 }
2318
2319 /// Returns true if the recipe only uses the first part of operand \p Op.
2320 bool usesFirstPartOnly(const VPValue *Op) const override {
2322 "Op must be an operand of the recipe");
2323 assert(getNumOperands() <= 2 && "must have at most two operands");
2324 return true;
2325 }
2326
2328 auto *VEPR = new VPVectorEndPointerRecipe(
2331 if (auto *Offset = getOffset())
2332 VEPR->addOffset(Offset);
2333 return VEPR;
2334 }
2335
2336protected:
2337#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2338 /// Print the recipe.
2339 void printRecipe(raw_ostream &O, const Twine &Indent,
2340 VPSlotTracker &SlotTracker) const override;
2341#endif
2342};
2343
2344/// A recipe to compute the pointers for widened memory accesses of \p
2345/// SourceElementTy, with the \p Stride expressed in units of \p
2346/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2347/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2349 Type *SourceElementTy;
2350
2351public:
2352 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2353 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2354 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2355 ArrayRef<VPValue *>({Ptr, Stride}),
2356 Ptr->getScalarType(), GEPFlags, DL),
2357 SourceElementTy(SourceElementTy) {}
2358
2359 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2360
2361 VPValue *getStride() const { return getOperand(1); }
2362
2364 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2365 }
2366
2367 /// Add the per-part offset (VFxPart) used for unrolled parts > 0.
2368 void addPerPartOffset(VPValue *VFxPart) {
2369 assert(VFxPart->getScalarType()->isIntegerTy() &&
2370 "per-part offset must be an integer index");
2371 VPUser::addOperand(VFxPart);
2372 }
2373
2374 void execute(VPTransformState &State) override;
2375
2376 Type *getSourceElementType() const { return SourceElementTy; }
2377
2378 bool usesFirstLaneOnly(const VPValue *Op) const override {
2380 "Op must be an operand of the recipe");
2381 return true;
2382 }
2383
2384 /// Returns true if the recipe only uses the first part of operand \p Op.
2385 bool usesFirstPartOnly(const VPValue *Op) const override {
2387 "Op must be an operand of the recipe");
2388 assert(getNumOperands() <= 2 && "must have at most two operands");
2389 return true;
2390 }
2391
2393 auto *Clone =
2394 new VPVectorPointerRecipe(getOperand(0), SourceElementTy, getStride(),
2396 if (auto *VFxPart = getVFxPart())
2397 Clone->addPerPartOffset(VFxPart);
2398 return Clone;
2399 }
2400
2401 /// Return the cost of this VPHeaderPHIRecipe.
2403 VPCostContext &Ctx) const override {
2404 // TODO: Compute accurate cost after retiring the legacy cost model.
2405 return 0;
2406 }
2407
2408protected:
2409#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2410 /// Print the recipe.
2411 void printRecipe(raw_ostream &O, const Twine &Indent,
2412 VPSlotTracker &SlotTracker) const override;
2413#endif
2414};
2415
2416/// A pure virtual base class for all recipes modeling header phis, including
2417/// phis for first order recurrences, pointer inductions and reductions. The
2418/// start value is the first operand of the recipe and the incoming value from
2419/// the backedge is the second operand.
2420///
2421/// Inductions are modeled using the following sub-classes:
2422/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2423/// floating point inductions with arbitrary start and step values. Produces
2424/// a vector PHI per-part.
2425/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2426/// pointer induction. Produces either a vector PHI per-part or scalar values
2427/// per-lane based on the canonical induction.
2428/// * VPFirstOrderRecurrencePHIRecipe
2429/// * VPReductionPHIRecipe
2430/// * VPActiveLaneMaskPHIRecipe
2431/// * VPEVLBasedIVPHIRecipe
2432///
2433/// Note that the canonical IV is modeled as a VPRegionValue associated with
2434/// its loop region.
2436 public VPPhiAccessors {
2437protected:
2438 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2439 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2440 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2441 Start->getScalarType(), DL) {}
2442
2443 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2444 VPValue *Start, Type *ResultTy, DebugLoc DL)
2445 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2446
2447 const VPRecipeBase *getAsRecipe() const override { return this; }
2448
2449public:
2450 ~VPHeaderPHIRecipe() override = default;
2451
2452 /// Method to support type inquiry through isa, cast, and dyn_cast.
2453 static inline bool classof(const VPRecipeBase *R) {
2454 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2455 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2456 }
2457 static inline bool classof(const VPValue *V) {
2458 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2459 }
2460 static inline bool classof(const VPSingleDefRecipe *R) {
2461 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2462 }
2463
2464 /// Generate the phi nodes.
2465 void execute(VPTransformState &State) override = 0;
2466
2467 /// Return the cost of this header phi recipe.
2469 VPCostContext &Ctx) const override;
2470
2471 /// Returns the start value of the phi, if one is set.
2473 return getNumOperands() == 0 ? nullptr : getOperand(0);
2474 }
2476 return getNumOperands() == 0 ? nullptr : getOperand(0);
2477 }
2478
2479 /// Update the start value of the recipe.
2481
2482 /// Returns the incoming value from the loop backedge.
2484 return getOperand(1);
2485 }
2486
2487 /// Update the incoming value from the loop backedge.
2489
2490 /// Add \p V as the incoming value from the loop backedge.
2492 assert(getNumOperands() == 1 &&
2493 "backedge value must be appended right after construction");
2494 assert(V->getScalarType() == getScalarType() &&
2495 "backedge value must have the same type as the start value");
2497 }
2498
2499 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2500 /// to be a recipe.
2502 return *getBackedgeValue()->getDefiningRecipe();
2503 }
2504
2505protected:
2506#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2507 /// Print the recipe.
2508 void printRecipe(raw_ostream &O, const Twine &Indent,
2509 VPSlotTracker &SlotTracker) const override = 0;
2510#endif
2511};
2512
2513/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2514/// VPWidenPointerInductionRecipe), providing shared functionality, including
2515/// retrieving the step value, induction descriptor and original phi node.
2517 InductionDescriptor IndDesc;
2518
2519public:
2520 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2521 VPValue *Step, const InductionDescriptor &IndDesc,
2522 DebugLoc DL)
2523 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2524 Start->getScalarType(), DL) {}
2525
2526 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2527 VPValue *Step, const InductionDescriptor &IndDesc,
2528 Type *ResultTy, DebugLoc DL)
2529 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2530 addOperand(Step);
2531 }
2532
2533 /// After unrolling, append the splat-VF step (`VF * step`) and the value of
2534 /// the induction at the last unrolled part.
2535 void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart) {
2536 assert(LastPart->getScalarType() == getScalarType() &&
2537 "last-part value must match the induction recipe's scalar type");
2539 ? SplatVFStep->getScalarType()->isIntegerTy()
2540 : SplatVFStep->getScalarType() == getScalarType()) &&
2541 "splat-step must match the induction type for non-pointer "
2542 "inductions, or be an integer index for pointer inductions");
2543 VPUser::addOperand(SplatVFStep);
2544 VPUser::addOperand(LastPart);
2545 }
2546
2547 static inline bool classof(const VPRecipeBase *R) {
2548 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2549 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2550 }
2551
2552 static inline bool classof(const VPValue *V) {
2553 auto *R = V->getDefiningRecipe();
2554 return R && classof(R);
2555 }
2556
2557 static inline bool classof(const VPSingleDefRecipe *R) {
2558 return classof(static_cast<const VPRecipeBase *>(R));
2559 }
2560
2561 void execute(VPTransformState &State) override = 0;
2562
2563 /// Returns the start value of the induction.
2565
2566 /// Returns the step value of the induction.
2568 const VPValue *getStepValue() const { return getOperand(1); }
2569
2570 /// Update the step value of the recipe.
2571 void setStepValue(VPValue *V) { setOperand(1, V); }
2572
2574 const VPValue *getVFValue() const { return getOperand(2); }
2575
2576 /// Returns the number of incoming values, also number of incoming blocks.
2577 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2578 /// incoming value, its start value.
2579 unsigned getNumIncoming() const override { return 1; }
2580
2581 /// Returns the underlying PHINode if one exists, or null otherwise.
2585
2586 /// Returns the induction descriptor for the recipe.
2587 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2588
2589 /// Returns the SCEV predicates associated with this induction.
2591 return IndDesc.getNoWrapPredicates();
2592 }
2593
2595 // TODO: All operands of base recipe must exist and be at same index in
2596 // derived recipe.
2598 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2599 }
2600
2602 // TODO: All operands of base recipe must exist and be at same index in
2603 // derived recipe.
2605 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2606 }
2607
2608 /// Returns true if the recipe only uses the first lane of operand \p Op.
2609 bool usesFirstLaneOnly(const VPValue *Op) const override {
2611 "Op must be an operand of the recipe");
2612 // The recipe creates its own wide start value, so it only requests the
2613 // first lane of the operand.
2614 // TODO: Remove once creating the start value is modeled separately.
2615 return Op == getStartValue() || Op == getStepValue();
2616 }
2617};
2618
2619/// A recipe for handling phi nodes of integer and floating-point inductions,
2620/// producing their vector values. This is an abstract recipe and must be
2621/// converted to concrete recipes before executing.
2623 public VPIRFlags {
2624 TruncInst *Trunc;
2625
2626 // If this recipe is unrolled it will have 2 additional operands.
2627 bool isUnrolled() const { return getNumOperands() == 5; }
2628
2629public:
2631 VPValue *VF, const InductionDescriptor &IndDesc,
2632 const VPIRFlags &Flags, DebugLoc DL)
2633 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2634 Start, Step, IndDesc, DL),
2635 VPIRFlags(Flags), Trunc(nullptr) {
2636 addOperand(VF);
2637 }
2638
2640 VPValue *VF, const InductionDescriptor &IndDesc,
2641 TruncInst *Trunc, const VPIRFlags &Flags,
2642 DebugLoc DL)
2643 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2644 Start, Step, IndDesc,
2645 Trunc ? Trunc->getType() : Start->getType(), DL),
2646 VPIRFlags(Flags), Trunc(Trunc) {
2647 addOperand(VF);
2649 if (Trunc)
2651 assert(Metadata.empty() && "unexpected metadata on Trunc");
2652 }
2653
2655
2661
2662 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2663
2664 void execute(VPTransformState &State) override {
2665 llvm_unreachable("cannot execute this recipe, should be expanded via "
2666 "expandVPWidenIntOrFpInductionRecipe");
2667 }
2668
2669 /// Returns the start value of the induction.
2671
2672 /// If the recipe has been unrolled, return the VPValue for the induction
2673 /// increment, otherwise return null.
2675 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2676 }
2677
2678 /// Returns the number of incoming values, also number of incoming blocks.
2679 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2680 /// incoming value, its start value.
2681 unsigned getNumIncoming() const override { return 1; }
2682
2683 /// Returns the first defined value as TruncInst, if it is one or nullptr
2684 /// otherwise.
2685 TruncInst *getTruncInst() { return Trunc; }
2686 const TruncInst *getTruncInst() const { return Trunc; }
2687
2688 /// Returns true if the induction is canonical, i.e. starting at 0 and
2689 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2690 /// same type as the canonical induction.
2691 bool isCanonical() const;
2692
2693 /// Returns the VPValue representing the value of this induction at
2694 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2695 /// take place.
2697 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2698 }
2699
2700protected:
2701#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2702 /// Print the recipe.
2703 void printRecipe(raw_ostream &O, const Twine &Indent,
2704 VPSlotTracker &SlotTracker) const override;
2705#endif
2706};
2707
2709public:
2710 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2711 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2712 /// VF*UF.
2714 VPValue *NumUnrolledElems,
2715 const InductionDescriptor &IndDesc, DebugLoc DL)
2716 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2717 Start, Step, IndDesc, DL) {
2718 addOperand(NumUnrolledElems);
2719 }
2720
2722
2728
2729 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2730
2731 /// Generate vector values for the pointer induction.
2732 void execute(VPTransformState &State) override {
2733 llvm_unreachable("cannot execute this recipe, should be expanded via "
2734 "expandVPWidenPointerInduction");
2735 };
2736
2737 /// Returns true if only scalar values will be generated.
2738 bool onlyScalarsGenerated(bool IsScalable);
2739
2740protected:
2741#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2742 /// Print the recipe.
2743 void printRecipe(raw_ostream &O, const Twine &Indent,
2744 VPSlotTracker &SlotTracker) const override;
2745#endif
2746};
2747
2748/// A recipe for widened phis. Incoming values are operands of the recipe and
2749/// their operand index corresponds to the incoming predecessor block. If the
2750/// recipe is placed in an entry block to a (non-replicate) region, it must have
2751/// exactly 2 incoming values, the first from the predecessor of the region and
2752/// the second from the exiting block of the region.
2754 public VPPhiAccessors {
2755 /// Name to use for the generated IR instruction for the widened phi.
2756 std::string Name;
2757
2758public:
2759 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2760 /// debug location \p DL and \p Name.
2762 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2763 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2764 IncomingValues[0]->getScalarType(),
2765 /*UV=*/nullptr, DL),
2766 Name(Name.str()) {
2767 assert(all_of(IncomingValues,
2768 [this](VPValue *VPV) {
2769 return VPV->getScalarType() == getScalarType();
2770 }) &&
2771 "all incoming values must have the same type");
2772 }
2773
2775 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2776 }
2777
2778 ~VPWidenPHIRecipe() override = default;
2779
2780 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2781
2782 /// Generate the phi/select nodes.
2783 void execute(VPTransformState &State) override;
2784
2785 /// Return the cost of this VPWidenPHIRecipe.
2787 VPCostContext &Ctx) const override;
2788
2789protected:
2790#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2791 /// Print the recipe.
2792 void printRecipe(raw_ostream &O, const Twine &Indent,
2793 VPSlotTracker &SlotTracker) const override;
2794#endif
2795
2796 const VPRecipeBase *getAsRecipe() const override { return this; }
2797};
2798
2799/// A recipe for handling first-order recurrence phis. The start value is the
2800/// first operand of the recipe and the incoming value from the backedge is the
2801/// second operand.
2804 VPValue &BackedgeValue)
2805 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2806 &Start) {
2807 addOperand(&BackedgeValue);
2808 }
2809
2810 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2811
2816
2817 void execute(VPTransformState &State) override;
2818
2819 /// Return the cost of this first-order recurrence phi recipe.
2821 VPCostContext &Ctx) const override;
2822
2823 /// Returns true if the recipe only uses the first lane of operand \p Op.
2824 bool usesFirstLaneOnly(const VPValue *Op) const override {
2826 "Op must be an operand of the recipe");
2827 return Op == getStartValue();
2828 }
2829
2830protected:
2831#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2832 /// Print the recipe.
2833 void printRecipe(raw_ostream &O, const Twine &Indent,
2834 VPSlotTracker &SlotTracker) const override;
2835#endif
2836};
2837
2838/// Possible variants of a reduction.
2839
2840/// This reduction is ordered and in-loop.
2841struct RdxOrdered {};
2842/// This reduction is in-loop.
2843struct RdxInLoop {};
2844/// This reduction is unordered with the partial result scaled down by some
2845/// factor.
2848};
2849using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2850
2851inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2852 unsigned ScaleFactor) {
2853 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2854 if (Ordered)
2855 return RdxOrdered{};
2856 if (InLoop)
2857 return RdxInLoop{};
2858 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2859}
2860
2861/// A recipe for handling reduction phis. The start value is the first operand
2862/// of the recipe and the incoming value from the backedge is the second
2863/// operand.
2865 /// The recurrence kind of the reduction.
2866 const RecurKind Kind;
2867
2868 ReductionStyle Style;
2869
2870 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2871 /// patterns for argmin/argmax).
2872 /// TODO: Also support cases where the phi itself has a single use, but its
2873 /// compare has multiple uses.
2874 bool HasUsesOutsideReductionChain;
2875
2876public:
2877 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2879 VPValue &BackedgeValue, ReductionStyle Style,
2880 const VPIRFlags &Flags,
2881 bool HasUsesOutsideReductionChain = false)
2882 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2883 VPIRFlags(Flags), Kind(Kind), Style(Style),
2884 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2885 addOperand(&BackedgeValue);
2886 }
2887
2888 ~VPReductionPHIRecipe() override = default;
2889
2891 VPValue *BackedgeValue) {
2892 return new VPReductionPHIRecipe(
2894 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2895 }
2896
2900
2901 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2902
2903 /// Generate the phi/select nodes.
2904 void execute(VPTransformState &State) override;
2905
2906 /// Get the factor that the VF of this recipe's output should be scaled by, or
2907 /// 1 if it isn't scaled.
2908 unsigned getVFScaleFactor() const {
2909 auto *Partial = std::get_if<RdxUnordered>(&Style);
2910 return Partial ? Partial->VFScaleFactor : 1;
2911 }
2912
2913 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2914 /// > 1.
2915 void setVFScaleFactor(unsigned ScaleFactor) {
2916 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2917 Style = RdxUnordered{ScaleFactor};
2918 }
2919
2920 /// Returns the recurrence kind of the reduction.
2921 RecurKind getRecurrenceKind() const { return Kind; }
2922
2923 /// Returns true, if the phi is part of an ordered reduction.
2924 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2925
2926 /// Returns true if the phi is part of an in-loop reduction.
2927 bool isInLoop() const {
2928 return std::holds_alternative<RdxInLoop>(Style) ||
2929 std::holds_alternative<RdxOrdered>(Style);
2930 }
2931
2932 /// Returns true if the reduction outputs a vector with a scaled down VF.
2933 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2934
2935 /// Returns true, if the phi is part of a multi-use reduction.
2937 return HasUsesOutsideReductionChain;
2938 }
2939
2940 /// Returns true if the recipe only uses the first lane of operand \p Op.
2941 bool usesFirstLaneOnly(const VPValue *Op) const override {
2943 "Op must be an operand of the recipe");
2944 return isOrdered() || isInLoop();
2945 }
2946
2947protected:
2948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2949 /// Print the recipe.
2950 void printRecipe(raw_ostream &O, const Twine &Indent,
2951 VPSlotTracker &SlotTracker) const override;
2952#endif
2953};
2954
2955/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2956/// instructions.
2958public:
2959 /// The blend operation is a User of the incoming values and of their
2960 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2961 /// be omitted (implied by passing an odd number of operands) in which case
2962 /// all other incoming values are merged into it.
2964 const VPIRFlags &Flags, DebugLoc DL)
2965 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands,
2966 Operands[0]->getScalarType(), Flags, DL) {
2967 assert(Operands.size() >= 2 && "Expected at least two operands!");
2969 [this](unsigned I) {
2970 return getIncomingValue(I)->getScalarType() ==
2971 getScalarType();
2972 }) &&
2973 "all incoming values must have the same type");
2975 [this](unsigned I) {
2976 return getMask(I)->getScalarType()->isIntegerTy(1);
2977 }) &&
2978 "masks must be a bool");
2979 setUnderlyingValue(Phi);
2980 }
2981
2983
2986 NewOperands, *this, getDebugLoc());
2987 }
2988
2989 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2990
2991 /// A normalized blend is one that has an odd number of operands, whereby the
2992 /// first operand does not have an associated mask.
2993 bool isNormalized() const { return getNumOperands() % 2; }
2994
2995 /// Return the number of incoming values, taking into account when normalized
2996 /// the first incoming value will have no mask.
2997 unsigned getNumIncomingValues() const {
2998 return (getNumOperands() + isNormalized()) / 2;
2999 }
3000
3001 /// Return incoming value number \p Idx.
3002 VPValue *getIncomingValue(unsigned Idx) const {
3003 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
3004 }
3005
3006 /// Return mask number \p Idx.
3007 VPValue *getMask(unsigned Idx) const {
3008 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3009 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
3010 }
3011
3012 /// Set mask number \p Idx to \p V.
3013 void setMask(unsigned Idx, VPValue *V) {
3014 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3015 assert(V->getScalarType()->isIntegerTy(1) && "Mask must be an i1 (vector)");
3016 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
3017 }
3018
3019 void execute(VPTransformState &State) override {
3020 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
3021 }
3022
3023 /// Return the cost of this VPWidenMemoryRecipe.
3024 InstructionCost computeCost(ElementCount VF,
3025 VPCostContext &Ctx) const override;
3026
3027 /// Returns true if the recipe only uses the first lane of operand \p Op.
3028 bool usesFirstLaneOnly(const VPValue *Op) const override;
3029
3030protected:
3031#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3032 /// Print the recipe.
3033 void printRecipe(raw_ostream &O, const Twine &Indent,
3034 VPSlotTracker &SlotTracker) const override;
3035#endif
3036};
3037
3038/// A common base class for interleaved memory operations.
3039/// An Interleaved memory operation is a memory access method that combines
3040/// multiple strided loads/stores into a single wide load/store with shuffles.
3041/// The first operand is the start address. The optional operands are, in order,
3042/// the stored values and the mask.
3044 public VPIRMetadata {
3046
3047 /// Indicates if the interleave group is in a conditional block and requires a
3048 /// mask.
3049 bool HasMask = false;
3050
3051 /// Indicates if gaps between members of the group need to be masked out or if
3052 /// unusued gaps can be loaded speculatively.
3053 bool NeedsMaskForGaps = false;
3054
3055protected:
3056 VPInterleaveBase(const unsigned char SC,
3058 ArrayRef<VPValue *> Operands,
3059 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3060 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3061 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
3062 NeedsMaskForGaps(NeedsMaskForGaps) {
3063 // TODO: extend the masked interleaved-group support to reversed access.
3064 assert((!Mask || !IG->isReverse()) &&
3065 "Reversed masked interleave-group not supported.");
3066 if (StoredValues.empty()) {
3067 for (Instruction *Inst : IG->members()) {
3068 assert(!Inst->getType()->isVoidTy() && "must have result");
3069 new VPMultiDefValue(this, Inst, Inst->getType());
3070 }
3071 } else {
3072 for (auto *SV : StoredValues)
3073 addOperand(SV);
3074 }
3075 if (Mask) {
3076 HasMask = true;
3077 addOperand(Mask);
3078 }
3079 }
3080
3081public:
3082 VPInterleaveBase *clone() override = 0;
3083
3084 static inline bool classof(const VPRecipeBase *R) {
3085 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3086 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3087 }
3088
3089 static inline bool classof(const VPUser *U) {
3090 auto *R = dyn_cast<VPRecipeBase>(U);
3091 return R && classof(R);
3092 }
3093
3094 /// Return the address accessed by this recipe.
3095 VPValue *getAddr() const {
3096 return getOperand(0); // Address is the 1st, mandatory operand.
3097 }
3098
3099 /// Return the mask used by this recipe. Note that a full mask is represented
3100 /// by a nullptr.
3101 VPValue *getMask() const {
3102 // Mask is optional and the last operand.
3103 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
3104 }
3105
3106 /// Return true if the access needs a mask because of the gaps.
3107 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3108
3110
3111 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3112
3113 void execute(VPTransformState &State) override {
3114 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3115 }
3116
3117 /// Return the cost of this recipe.
3118 InstructionCost computeCost(ElementCount VF,
3119 VPCostContext &Ctx) const override;
3120
3121 /// Returns true if the recipe only uses the first lane of operand \p Op.
3122 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3123
3124 /// Returns the number of stored operands of this interleave group. Returns 0
3125 /// for load interleave groups.
3126 virtual unsigned getNumStoreOperands() const = 0;
3127
3128 /// Return the VPValues stored by this interleave group. If it is a load
3129 /// interleave group, return an empty ArrayRef.
3131 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3133 }
3134};
3135
3136/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3137/// or stores into one wide load/store and shuffles. The first operand of a
3138/// VPInterleave recipe is the address, followed by the stored values, followed
3139/// by an optional mask.
3141public:
3143 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3144 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3145 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3146 Mask, NeedsMaskForGaps, MD, DL) {}
3147
3148 ~VPInterleaveRecipe() override = default;
3149
3153 needsMaskForGaps(), *this, getDebugLoc());
3154 }
3155
3156 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3157
3158 /// Generate the wide load or store, and shuffles.
3159 void execute(VPTransformState &State) override;
3160
3161 bool usesFirstLaneOnly(const VPValue *Op) const override {
3163 "Op must be an operand of the recipe");
3164 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
3165 }
3166
3167 unsigned getNumStoreOperands() const override {
3168 return getNumOperands() - (getMask() ? 2 : 1);
3169 }
3170
3171protected:
3172#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3173 /// Print the recipe.
3174 void printRecipe(raw_ostream &O, const Twine &Indent,
3175 VPSlotTracker &SlotTracker) const override;
3176#endif
3177};
3178
3179/// A recipe for interleaved memory operations with vector-predication
3180/// intrinsics. The first operand is the address, the second operand is the
3181/// explicit vector length. Stored values and mask are optional operands.
3183public:
3185 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3186 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3187 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3188 R.getDebugLoc()) {
3189 assert(!getInterleaveGroup()->isReverse() &&
3190 "Reversed interleave-group with tail folding is not supported.");
3191 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3192 "supported for scalable vector.");
3193 }
3194
3195 ~VPInterleaveEVLRecipe() override = default;
3196
3198 llvm_unreachable("cloning not implemented yet");
3199 }
3200
3201 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3202
3203 /// The VPValue of the explicit vector length.
3204 VPValue *getEVL() const { return getOperand(1); }
3205
3206 /// Generate the wide load or store, and shuffles.
3207 void execute(VPTransformState &State) override;
3208
3209 /// The recipe only uses the first lane of the address, and EVL operand.
3210 bool usesFirstLaneOnly(const VPValue *Op) const override {
3212 "Op must be an operand of the recipe");
3213 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3214 Op == getEVL();
3215 }
3216
3217 unsigned getNumStoreOperands() const override {
3218 return getNumOperands() - (getMask() ? 3 : 2);
3219 }
3220
3221protected:
3222#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3223 /// Print the recipe.
3224 void printRecipe(raw_ostream &O, const Twine &Indent,
3225 VPSlotTracker &SlotTracker) const override;
3226#endif
3227};
3228
3229/// A recipe to represent inloop, ordered or partial reduction operations. It
3230/// performs a reduction on a vector operand into a scalar (vector in the case
3231/// of a partial reduction) value, and adds the result to a chain. The Operands
3232/// are {ChainOp, VecOp, [Condition]}.
3234
3235 /// The recurrence kind for the reduction in question.
3236 RecurKind RdxKind;
3237 /// Whether the reduction is conditional.
3238 bool IsConditional = false;
3239 ReductionStyle Style;
3240
3241protected:
3242 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3244 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3245 ReductionStyle Style, DebugLoc DL)
3246 : VPRecipeWithIRFlags(SC, Operands, Operands[0]->getScalarType(), FMFs,
3247 DL),
3248 RdxKind(RdxKind), Style(Style) {
3249 assert(all_of(Operands,
3250 [this](VPValue *VPV) {
3251 return VPV->getScalarType() == getScalarType() ||
3252 (isa<VPInstruction>(VPV) &&
3253 cast<VPInstruction>(VPV)->getOpcode() ==
3255 }) &&
3256 "all incoming values must have the same type");
3257 if (CondOp) {
3258 assert(CondOp->getScalarType()->isIntegerTy(1) &&
3259 "CondOp must be a bool");
3260 IsConditional = true;
3261 addOperand(CondOp);
3262 }
3264 }
3265
3266public:
3268 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3270 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3271 {ChainOp, VecOp}, CondOp, Style, DL) {}
3272
3274 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3276 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3277 {ChainOp, VecOp}, CondOp, Style, DL) {}
3278
3279 ~VPReductionRecipe() override = default;
3280
3282 return new VPReductionRecipe(RdxKind, getFastMathFlagsOrNone(),
3284 getCondOp(), Style, getDebugLoc());
3285 }
3286
3287 static inline bool classof(const VPRecipeBase *R) {
3288 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3289 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3290 }
3291
3292 static inline bool classof(const VPUser *U) {
3293 auto *R = dyn_cast<VPRecipeBase>(U);
3294 return R && classof(R);
3295 }
3296
3297 static inline bool classof(const VPValue *VPV) {
3298 const VPRecipeBase *R = VPV->getDefiningRecipe();
3299 return R && classof(R);
3300 }
3301
3302 static inline bool classof(const VPSingleDefRecipe *R) {
3303 return classof(static_cast<const VPRecipeBase *>(R));
3304 }
3305
3306 /// Generate the reduction in the loop.
3307 void execute(VPTransformState &State) override;
3308
3309 /// Return the cost of VPReductionRecipe.
3310 InstructionCost computeCost(ElementCount VF,
3311 VPCostContext &Ctx) const override;
3312
3313 /// Return the recurrence kind for the in-loop reduction.
3314 RecurKind getRecurrenceKind() const { return RdxKind; }
3315 /// Return true if the in-loop reduction is ordered.
3316 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3317 /// Return true if the in-loop reduction is conditional.
3318 bool isConditional() const { return IsConditional; };
3319 /// Returns true if the reduction outputs a vector with a scaled down VF.
3320 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3321 /// Returns true if the reduction is in-loop.
3322 bool isInLoop() const {
3323 return std::holds_alternative<RdxInLoop>(Style) ||
3324 std::holds_alternative<RdxOrdered>(Style);
3325 }
3326 /// The VPValue of the scalar Chain being accumulated.
3327 VPValue *getChainOp() const { return getOperand(0); }
3328 /// The VPValue of the vector value to be reduced.
3329 VPValue *getVecOp() const { return getOperand(1); }
3330 /// The VPValue of the condition for the block.
3332 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3333 }
3334 /// Get the factor that the VF of this recipe's output should be scaled by, or
3335 /// 1 if it isn't scaled.
3336 unsigned getVFScaleFactor() const {
3337 auto *Partial = std::get_if<RdxUnordered>(&Style);
3338 return Partial ? Partial->VFScaleFactor : 1;
3339 }
3340
3341protected:
3342#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3343 /// Print the recipe.
3344 void printRecipe(raw_ostream &O, const Twine &Indent,
3345 VPSlotTracker &SlotTracker) const override;
3346#endif
3347};
3348
3349/// A recipe to represent inloop reduction operations with vector-predication
3350/// intrinsics, performing a reduction on a vector operand with the explicit
3351/// vector length (EVL) into a scalar value, and adding the result to a chain.
3352/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3354public:
3357 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3360 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3361 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3362 DL) {}
3363
3364 ~VPReductionEVLRecipe() override = default;
3365
3367 llvm_unreachable("cloning not implemented yet");
3368 }
3369
3370 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3371
3372 /// Generate the reduction in the loop
3373 void execute(VPTransformState &State) override;
3374
3375 /// The VPValue of the explicit vector length.
3376 VPValue *getEVL() const { return getOperand(2); }
3377
3378 /// Returns true if the recipe only uses the first lane of operand \p Op.
3379 bool usesFirstLaneOnly(const VPValue *Op) const override {
3381 "Op must be an operand of the recipe");
3382 return Op == getEVL();
3383 }
3384
3385protected:
3386#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3387 /// Print the recipe.
3388 void printRecipe(raw_ostream &O, const Twine &Indent,
3389 VPSlotTracker &SlotTracker) const override;
3390#endif
3391};
3392
3393/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3394/// copies of the original scalar type, one per lane, instead of producing a
3395/// single copy of widened type for all lanes. If the instruction is known to be
3396/// a single scalar, only one copy will be generated.
3398 public VPIRMetadata {
3399 /// Indicator if only a single replica per lane is needed.
3400 bool IsSingleScalar;
3401
3402 /// Indicator if the replicas are also predicated.
3403 bool IsPredicated;
3404
3405public:
3407 bool IsSingleScalar, VPValue *Mask = nullptr,
3408 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3409 DebugLoc DL = DebugLoc::getUnknown())
3410 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands,
3411 computeScalarType(I, Operands), Flags, DL),
3412 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3413 IsPredicated(Mask) {
3414 assert((!IsSingleScalar || !I->isCast()) &&
3415 "single-scalar casts should use VPInstructionWithType");
3416 setUnderlyingValue(I);
3417 if (Mask)
3418 addOperand(Mask);
3419 }
3420
3421 ~VPReplicateRecipe() override = default;
3422
3423 /// Compute the scalar result type for a VPReplicateRecipe wrapping \p I with
3424 /// \p Operands (excluding any predicate mask).
3425 static Type *computeScalarType(const Instruction *I,
3426 ArrayRef<VPValue *> Operands);
3427
3429
3431 auto *Copy = new VPReplicateRecipe(
3432 getUnderlyingInstr(), NewOperands, IsSingleScalar,
3433 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3434 Copy->transferFlags(*this);
3435 return Copy;
3436 }
3437
3438 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3439
3440 /// Generate replicas of the desired Ingredient. Replicas will be generated
3441 /// for all parts and lanes unless a specific part and lane are specified in
3442 /// the \p State.
3443 void execute(VPTransformState &State) override;
3444
3445 /// Return the cost of this VPReplicateRecipe.
3446 InstructionCost computeCost(ElementCount VF,
3447 VPCostContext &Ctx) const override;
3448
3449 /// Return the cost of scalarizing a call to \p CalledFn with argument
3450 /// operands \p ArgOps for a given \p VF.
3451 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3453 bool IsSingleScalar, ElementCount VF,
3454 VPCostContext &Ctx);
3455
3456 bool isSingleScalar() const { return IsSingleScalar; }
3457
3458 bool isPredicated() const { return IsPredicated; }
3459
3460 /// Returns true if the recipe only uses the first lane of operand \p Op.
3461 bool usesFirstLaneOnly(const VPValue *Op) const override {
3463 "Op must be an operand of the recipe");
3464 return isSingleScalar();
3465 }
3466
3467 /// Returns true if the recipe uses scalars of operand \p Op.
3468 bool usesScalars(const VPValue *Op) const override {
3470 "Op must be an operand of the recipe");
3471 return true;
3472 }
3473
3474 /// Return the mask of a predicated VPReplicateRecipe.
3476 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3477 return getOperand(getNumOperands() - 1);
3478 }
3479
3480 /// Return the recipe's operands, excluding the mask of a predicated recipe.
3484
3485 /// Returns the number of operands, excluding the mask if the recipe is
3486 /// predicated.
3487 unsigned getNumOperandsWithoutMask() const {
3488 return getNumOperands() - isPredicated();
3489 }
3490
3491 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3492
3493protected:
3494#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3495 /// Print the recipe.
3496 void printRecipe(raw_ostream &O, const Twine &Indent,
3497 VPSlotTracker &SlotTracker) const override;
3498#endif
3499};
3500
3501/// A recipe for generating conditional branches on the bits of a mask.
3503public:
3505 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3506
3509 }
3510
3511 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3512
3513 /// Generate the extraction of the appropriate bit from the block mask and the
3514 /// conditional branch.
3515 void execute(VPTransformState &State) override;
3516
3517 /// Return the cost of this VPBranchOnMaskRecipe.
3518 InstructionCost computeCost(ElementCount VF,
3519 VPCostContext &Ctx) const override;
3520
3521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3522 /// Print the recipe.
3523 void printRecipe(raw_ostream &O, const Twine &Indent,
3524 VPSlotTracker &SlotTracker) const override {
3525 O << Indent << "BRANCH-ON-MASK ";
3527 }
3528#endif
3529
3530 /// Returns true if the recipe uses scalars of operand \p Op.
3531 bool usesScalars(const VPValue *Op) const override {
3533 "Op must be an operand of the recipe");
3534 return true;
3535 }
3536};
3537
3538/// A recipe to combine multiple recipes into a single 'expression' recipe,
3539/// which should be considered a single entity for cost-modeling and transforms.
3540/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3541/// expression recipes, before execute. The individual expression recipes are
3542/// completely disconnected from the def-use graph of other recipes not part of
3543/// the expression. Def-use edges between pairs of expression recipes remain
3544/// intact, whereas every edge between an expression recipe and a recipe outside
3545/// the expression is elevated to connect the non-expression recipe with the
3546/// VPExpressionRecipe itself.
3547class VPExpressionRecipe : public VPSingleDefRecipe {
3548 /// Recipes included in this VPExpressionRecipe. This could contain
3549 /// duplicates.
3550 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3551
3552 /// Temporary VPValues used for external operands of the expression, i.e.
3553 /// operands not defined by recipes in the expression.
3554 SmallVector<VPValue *> LiveInPlaceholders;
3555
3556 enum class ExpressionTypes {
3557 /// Represents an inloop extended reduction operation, performing a
3558 /// reduction on an extended vector operand into a scalar value, and adding
3559 /// the result to a chain.
3560 ExtendedReduction,
3561 /// Represents an inloop extended reduction operation, which is negated,
3562 /// then reduced before adding the result to a chain.
3563 NegatedExtendedReduction,
3564 /// Represent an inloop multiply-accumulate reduction, multiplying the
3565 /// extended vector operands, performing a reduction.add on the result, and
3566 /// adding the scalar result to a chain.
3567 ExtMulAccReduction,
3568 /// Represent an inloop multiply-accumulate reduction, multiplying the
3569 /// vector operands, performing a reduction.add on the result, and adding
3570 /// the scalar result to a chain.
3571 MulAccReduction,
3572 /// Represent an inloop multiply-accumulate reduction, multiplying the
3573 /// extended vector operands, negating the multiplication, performing a
3574 /// reduction.add on the result, and adding the scalar result to a chain.
3575 ExtNegatedMulAccReduction,
3576 };
3577
3578 /// Type of the expression.
3579 ExpressionTypes ExpressionType;
3580
3581 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3582 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3583 /// in the expression) are replaced by temporary VPValues and the original
3584 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3585 /// as needed (excluding last) to ensure they are only used by other recipes
3586 /// in the expression.
3587 VPExpressionRecipe(ExpressionTypes ExpressionType,
3588 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3589
3590public:
3592 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3594 VPReductionRecipe *Red)
3595 : VPExpressionRecipe(ExpressionTypes::NegatedExtendedReduction,
3596 {Ext, Neg, Red}) {
3597 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3598 Red->getRecurrenceKind() == RecurKind::FAdd ||
3599 Red->getRecurrenceKind() == RecurKind::AddChainWithSubs) &&
3600 "Expected an add or add-chain-with-subs reduction");
3601 if (Neg->getOpcode() == Instruction::Sub) {
3602 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(1));
3603 assert(SubConst && SubConst->isZero() && "Expected a negating sub");
3604 } else
3605 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3606 }
3608 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3611 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3612 {Ext0, Ext1, Mul, Red}) {}
3615 VPReductionRecipe *Red)
3616 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3617 {Ext0, Ext1, Mul, Neg, Red}) {
3618 assert((Mul->getOpcode() == Instruction::Mul ||
3619 Mul->getOpcode() == Instruction::FMul) &&
3620 "Expected a mul");
3621 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3622 Red->getRecurrenceKind() == RecurKind::FAdd ||
3623 Red->getRecurrenceKind() == RecurKind::AddChainWithSubs) &&
3624 "Expected an add or add-chain-with-subs reduction");
3625 assert(getNumOperands() >= 3 && "Expected at least three operands");
3626 if (Neg->getOpcode() == Instruction::Sub) {
3627 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3628 assert(SubConst && SubConst->isZero() &&
3629 Neg->getOpcode() == Instruction::Sub && "Expected a negating sub");
3630 } else
3631 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3632 }
3633
3635 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3636 for (auto *R : reverse(ExpressionRecipes)) {
3637 if (ExpressionRecipesSeen.insert(R).second)
3638 delete R;
3639 }
3640 for (VPValue *T : LiveInPlaceholders)
3641 delete T;
3642 }
3643
3644 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3645
3646 VPExpressionRecipe *clone() override {
3647 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3648 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3649 for (auto *R : ExpressionRecipes)
3650 NewExpressiondRecipes.push_back(R->clone());
3651 for (auto *New : NewExpressiondRecipes) {
3652 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3653 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3654 // Update placeholder operands in the cloned recipe to use the external
3655 // operands, to be internalized when the cloned expression is constructed.
3656 for (const auto &[Placeholder, OutsideOp] :
3657 zip(LiveInPlaceholders, operands()))
3658 New->replaceUsesOfWith(Placeholder, OutsideOp);
3659 }
3660 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3661 }
3662
3663 /// Return the VPValue to use to infer the result type of the recipe.
3665 unsigned OpIdx =
3666 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3667 : 1;
3668 return getOperand(getNumOperands() - OpIdx);
3669 }
3670
3671 /// Insert the recipes of the expression back into the VPlan, directly before
3672 /// the current recipe. Leaves the expression recipe empty, which must be
3673 /// removed before codegen.
3674 void decompose();
3675
3676 unsigned getVFScaleFactor() const {
3677 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3678 return PR ? PR->getVFScaleFactor() : 1;
3679 }
3680
3681 /// Method for generating code, must not be called as this recipe is abstract.
3682 void execute(VPTransformState &State) override {
3683 llvm_unreachable("recipe must be removed before execute");
3684 }
3685
3687 VPCostContext &Ctx) const override;
3688
3689 /// Returns true if this expression contains recipes that may read from or
3690 /// write to memory.
3691 bool mayReadOrWriteMemory() const;
3692
3693 /// Returns true if this expression contains recipes that may have side
3694 /// effects.
3695 bool mayHaveSideEffects() const;
3696
3697 /// Returns true if this VPExpressionRecipe produces a single scalar.
3698 bool isVectorToScalar() const;
3699
3700protected:
3701#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3702 /// Print the recipe.
3703 void printRecipe(raw_ostream &O, const Twine &Indent,
3704 VPSlotTracker &SlotTracker) const override;
3705#endif
3706};
3707
3708/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3709/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3710/// order to merge values that are set under such a branch and feed their uses.
3711/// The phi nodes can be scalar or vector depending on the users of the value.
3712/// This recipe works in concert with VPBranchOnMaskRecipe.
3714public:
3715 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3716 /// nodes after merging back from a Branch-on-Mask.
3718 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3719 PredV->getScalarType(), /*UV=*/nullptr, DL) {}
3720 ~VPPredInstPHIRecipe() override = default;
3721
3723 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3724 }
3725
3726 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3727
3728 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3729 /// retain SSA form.
3730 void execute(VPTransformState &State) override;
3731
3732 /// Return the cost of this VPPredInstPHIRecipe.
3734 VPCostContext &Ctx) const override {
3735 // TODO: Compute accurate cost after retiring the legacy cost model.
3736 return 0;
3737 }
3738
3739protected:
3740#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3741 /// Print the recipe.
3742 void printRecipe(raw_ostream &O, const Twine &Indent,
3743 VPSlotTracker &SlotTracker) const override;
3744#endif
3745};
3746
3747/// A common mixin class for widening memory operations. An optional mask can be
3748/// provided as the last operand.
3750protected:
3752
3753 /// Alignment information for this memory access.
3755
3756 /// Whether the accessed addresses are consecutive.
3758
3759 /// Whether the memory access is masked.
3760 bool IsMasked = false;
3761
3762 void setMask(VPValue *Mask) {
3763 assert(!IsMasked && "cannot re-set mask");
3764 if (!Mask)
3765 return;
3766 assert(Mask->getScalarType()->isIntegerTy(1) &&
3767 "Mask must be an i1 (vector)");
3768 getAsRecipe()->addOperand(Mask);
3769 IsMasked = true;
3770 }
3771
3776
3777public:
3778 virtual ~VPWidenMemoryRecipe() = default;
3779
3780 /// Return a VPRecipeBase* to the current object.
3782 virtual const VPRecipeBase *getAsRecipe() const = 0;
3783
3784 /// Return whether the loaded-from / stored-to addresses are consecutive.
3785 bool isConsecutive() const { return Consecutive; }
3786
3787 /// Return the address accessed by this recipe.
3788 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3789
3790 /// Returns true if the recipe is masked.
3791 bool isMasked() const { return IsMasked; }
3792
3793 /// Return the mask used by this recipe. Note that a full mask is represented
3794 /// by a nullptr.
3795 VPValue *getMask() const {
3796 // Mask is optional and therefore the last operand.
3797 const VPRecipeBase *R = getAsRecipe();
3798 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3799 }
3800
3801 /// Returns the alignment of the memory access.
3802 Align getAlign() const { return Alignment; }
3803
3804 /// Return the cost of this VPWidenMemoryRecipe.
3805 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3806
3808};
3809
3810/// A recipe for widening load operations, using the address to load from and an
3811/// optional mask.
3813 public VPWidenMemoryRecipe {
3815 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3816 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3817 &Load, DL),
3818 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3819 setMask(Mask);
3820 }
3821
3824 getMask(), Consecutive, *this, getDebugLoc());
3825 }
3826
3827 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3828
3829 /// Generate a wide load or gather.
3830 void execute(VPTransformState &State) override;
3831
3832 /// Return the cost of this VPWidenLoadRecipe.
3834 VPCostContext &Ctx) const override {
3835 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3836 }
3837
3838 /// Returns true if the recipe only uses the first lane of operand \p Op.
3839 bool usesFirstLaneOnly(const VPValue *Op) const override {
3841 "Op must be an operand of the recipe");
3842 // Widened, consecutive loads operations only demand the first lane of
3843 // their address.
3844 return Op == getAddr() && isConsecutive();
3845 }
3846
3847protected:
3848 VPRecipeBase *getAsRecipe() override;
3849 const VPRecipeBase *getAsRecipe() const override;
3850
3851#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3852 /// Print the recipe.
3853 void printRecipe(raw_ostream &O, const Twine &Indent,
3854 VPSlotTracker &SlotTracker) const override;
3855#endif
3856};
3857
3858/// A recipe for widening load operations with vector-predication intrinsics,
3859/// using the address to load from, the explicit vector length and an optional
3860/// mask.
3862 : public VPSingleDefRecipe,
3863 public VPWidenMemoryRecipe {
3865 VPValue *Mask)
3866 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3867 L.getIngredient().getType(), &L.getIngredient(),
3868 L.getDebugLoc()),
3869 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3870 setMask(Mask);
3871 }
3872
3874 llvm_unreachable("cloning not supported");
3875 }
3876
3877 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3878
3879 /// Return the EVL operand.
3880 VPValue *getEVL() const { return getOperand(1); }
3881
3882 /// Generate the wide load or gather.
3883 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3884
3885 /// Return the cost of this VPWidenLoadEVLRecipe.
3887 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3888
3889 /// Returns true if the recipe only uses the first lane of operand \p Op.
3890 bool usesFirstLaneOnly(const VPValue *Op) const override {
3892 "Op must be an operand of the recipe");
3893 // Widened loads only demand the first lane of EVL and consecutive loads
3894 // only demand the first lane of their address.
3895 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3896 }
3897
3898protected:
3899 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
3900 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
3901
3902#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3903 /// Print the recipe.
3904 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3905 VPSlotTracker &SlotTracker) const override;
3906#endif
3907};
3908
3909/// A recipe for widening store operations, using the stored value, the address
3910/// to store to and an optional mask.
3912 public VPWidenMemoryRecipe {
3913 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3914 VPValue *Mask, bool Consecutive,
3915 const VPIRMetadata &Metadata, DebugLoc DL)
3916 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3917 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3918 setMask(Mask);
3919 }
3920
3924 *this, getDebugLoc());
3925 }
3926
3927 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3928
3929 /// Return the value stored by this recipe.
3930 VPValue *getStoredValue() const { return getOperand(1); }
3931
3932 /// Generate a wide store or scatter.
3933 void execute(VPTransformState &State) override;
3934
3935 /// Return the cost of this VPWidenStoreRecipe.
3937 VPCostContext &Ctx) const override {
3938 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3939 }
3940
3941 /// Returns true if the recipe only uses the first lane of operand \p Op.
3942 bool usesFirstLaneOnly(const VPValue *Op) const override {
3944 "Op must be an operand of the recipe");
3945 // Widened, consecutive stores only demand the first lane of their address,
3946 // unless the same operand is also stored.
3947 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3948 }
3949
3950protected:
3951 VPRecipeBase *getAsRecipe() override;
3952 const VPRecipeBase *getAsRecipe() const override;
3953
3954#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3955 /// Print the recipe.
3956 void printRecipe(raw_ostream &O, const Twine &Indent,
3957 VPSlotTracker &SlotTracker) const override;
3958#endif
3959};
3960
3961/// A recipe for widening store operations with vector-predication intrinsics,
3962/// using the value to store, the address to store to, the explicit vector
3963/// length and an optional mask.
3965 : public VPRecipeBase,
3966 public VPWidenMemoryRecipe {
3968 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3969 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3970 S.getDebugLoc()),
3971 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3972 setMask(Mask);
3973 }
3974
3976 llvm_unreachable("cloning not supported");
3977 }
3978
3979 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3980
3981 /// Return the address accessed by this recipe.
3982 VPValue *getStoredValue() const { return getOperand(1); }
3983
3984 /// Return the EVL operand.
3985 VPValue *getEVL() const { return getOperand(2); }
3986
3987 /// Generate the wide store or scatter.
3988 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3989
3990 /// Return the cost of this VPWidenStoreEVLRecipe.
3992 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3993
3994 /// Returns true if the recipe only uses the first lane of operand \p Op.
3995 bool usesFirstLaneOnly(const VPValue *Op) const override {
3997 "Op must be an operand of the recipe");
3998 if (Op == getEVL()) {
3999 assert(getStoredValue() != Op && "unexpected store of EVL");
4000 return true;
4001 }
4002 // Widened, consecutive memory operations only demand the first lane of
4003 // their address, unless the same operand is also stored. That latter can
4004 // happen with opaque pointers.
4005 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
4006 }
4007
4008protected:
4009 LLVM_ABI_FOR_TEST VPRecipeBase *getAsRecipe() override;
4010 LLVM_ABI_FOR_TEST const VPRecipeBase *getAsRecipe() const override;
4011
4012#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4013 /// Print the recipe.
4014 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4015 VPSlotTracker &SlotTracker) const override;
4016#endif
4017};
4018
4019/// Recipe to expand a SCEV expression.
4021 const SCEV *Expr;
4022
4023public:
4024 VPExpandSCEVRecipe(const SCEV *Expr);
4025
4026 ~VPExpandSCEVRecipe() override = default;
4027
4028 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
4029
4030 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
4031
4032 void execute(VPTransformState &State) override {
4033 llvm_unreachable("SCEV expressions must be expanded before final execute");
4034 }
4035
4036 /// Return the cost of this VPExpandSCEVRecipe.
4038 VPCostContext &Ctx) const override {
4039 // TODO: Compute accurate cost after retiring the legacy cost model.
4040 return 0;
4041 }
4042
4043 const SCEV *getSCEV() const { return Expr; }
4044
4045protected:
4046#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4047 /// Print the recipe.
4048 void printRecipe(raw_ostream &O, const Twine &Indent,
4049 VPSlotTracker &SlotTracker) const override;
4050#endif
4051};
4052
4053/// A recipe for generating the active lane mask for the vector loop that is
4054/// used to predicate the vector operations.
4056public:
4058 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
4059 StartMask, DL) {}
4060
4061 ~VPActiveLaneMaskPHIRecipe() override = default;
4062
4065 if (getNumOperands() == 2)
4066 R->addBackedgeValue(getOperand(1));
4067 return R;
4068 }
4069
4070 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
4071
4072 /// Generate the active lane mask phi of the vector loop.
4073 void execute(VPTransformState &State) override;
4074
4075protected:
4076#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4077 /// Print the recipe.
4078 void printRecipe(raw_ostream &O, const Twine &Indent,
4079 VPSlotTracker &SlotTracker) const override;
4080#endif
4081};
4082
4083/// A recipe for generating the phi node tracking the current scalar iteration
4084/// index. It starts at the start value of the canonical induction and gets
4085/// incremented by the number of scalar iterations processed by the vector loop
4086/// iteration. The increment does not have to be loop invariant.
4088public:
4090 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
4091 StartIV, DL) {}
4092
4093 ~VPCurrentIterationPHIRecipe() override = default;
4094
4096 llvm_unreachable("cloning not implemented yet");
4097 }
4098
4099 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
4100
4101 void execute(VPTransformState &State) override {
4102 llvm_unreachable("cannot execute this recipe, should be replaced by a "
4103 "scalar phi recipe");
4104 }
4105
4106 /// Return the cost of this VPCurrentIterationPHIRecipe.
4108 VPCostContext &Ctx) const override {
4109 // For now, match the behavior of the legacy cost model.
4110 return 0;
4111 }
4112
4113 /// Returns true if the recipe only uses the first lane of operand \p Op.
4114 bool usesFirstLaneOnly(const VPValue *Op) const override {
4116 "Op must be an operand of the recipe");
4117 return true;
4118 }
4119
4120protected:
4121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4122 /// Print the recipe.
4123 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4124 VPSlotTracker &SlotTracker) const override;
4125#endif
4126};
4127
4128/// A Recipe for widening the canonical induction variable of the vector loop.
4129/// First operand is the canonical IV recipe, a second step operand (VF * Part)
4130/// is added during unrolling.
4132public:
4134 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
4135 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
4136 CanonicalIV->getType(), Flags) {}
4137
4138 ~VPWidenCanonicalIVRecipe() override = default;
4139
4141 auto *WideCanIV =
4143 if (VPValue *Step = getStepValue())
4144 WideCanIV->addPerPartStep(Step);
4145 return WideCanIV;
4146 }
4147
4148 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4149
4150 void execute(VPTransformState &State) override {
4151 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4152 }
4153
4154 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4156 VPCostContext &Ctx) const override {
4157 // TODO: Compute accurate cost after retiring the legacy cost model.
4158 return 0;
4159 }
4160
4161 /// Return the canonical IV being widened.
4165
4167 return getNumOperands() == 2 ? getOperand(1) : nullptr;
4168 }
4169
4170 /// Add the per-part step (VF * Part) used for unrolled parts.
4172 assert(Step->getScalarType() == getScalarType() &&
4173 "per-part step must have the same type as the canonical IV");
4174 VPUser::addOperand(Step);
4175 }
4176
4177protected:
4178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4179 /// Print the recipe.
4180 void printRecipe(raw_ostream &O, const Twine &Indent,
4181 VPSlotTracker &SlotTracker) const override;
4182#endif
4183};
4184
4185/// A recipe for converting the input value \p IV value to the corresponding
4186/// value of an IV with different start and step values, using Start + IV *
4187/// Step.
4189 /// Kind of the induction.
4191 /// If not nullptr, the floating point induction binary operator. Must be set
4192 /// for floating point inductions.
4193 const FPMathOperator *FPBinOp;
4194
4195public:
4197 VPValue *CanonicalIV, VPValue *Step)
4199 IndDesc.getKind(),
4200 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
4201 Start, CanonicalIV, Step) {}
4202
4204 const FPMathOperator *FPBinOp, VPIRValue *Start,
4205 VPValue *IV, VPValue *Step)
4206 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4207 Start->getScalarType(), nullptr),
4208 Kind(Kind), FPBinOp(FPBinOp) {}
4209
4210 ~VPDerivedIVRecipe() override = default;
4211
4213 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4214 getStepValue());
4215 }
4216
4217 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4218
4219 void execute(VPTransformState &State) override {
4220 llvm_unreachable("Expected prior expansion of this recipe");
4221 }
4222
4223 /// Return the cost of this VPDerivedIVRecipe.
4225 VPCostContext &Ctx) const override;
4226
4228 VPValue *getIndex() const { return getOperand(1); }
4229 VPValue *getStepValue() const { return getOperand(2); }
4230 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4232
4233 /// Returns true if the recipe only uses the first lane of operand \p Op.
4234 bool usesFirstLaneOnly(const VPValue *Op) const override {
4236 "Op must be an operand of the recipe");
4237 return true;
4238 }
4239
4240protected:
4241#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4242 /// Print the recipe.
4243 void printRecipe(raw_ostream &O, const Twine &Indent,
4244 VPSlotTracker &SlotTracker) const override;
4245#endif
4246};
4247
4248/// A recipe for handling phi nodes of integer and floating-point inductions,
4249/// producing their scalar values. Before unrolling by UF the recipe represents
4250/// the VF*UF scalar values to be produced, or UF scalar values if only first
4251/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4252/// operand StartIndex to all unroll parts except part 0, as the recipe
4253/// represents the VF scalar values (this number of values is taken from
4254/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4256 Instruction::BinaryOps InductionOpcode;
4257
4258public:
4261 DebugLoc DL)
4262 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4263 IV->getScalarType(), FMFs, DL),
4264 InductionOpcode(Opcode) {}
4265
4267 VPValue *Step, VPValue *VF,
4270 IV, Step, VF, IndDesc.getInductionOpcode(),
4271 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4272 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4273 : FastMathFlags(),
4274 DL) {}
4275
4276 ~VPScalarIVStepsRecipe() override = default;
4277
4279 auto *NewR = new VPScalarIVStepsRecipe(
4280 getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
4282 if (VPValue *StartIndex = getStartIndex())
4283 NewR->setStartIndex(StartIndex);
4284 return NewR;
4285 }
4286
4287 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4288
4289 /// Generate the scalarized versions of the phi node as needed by their users.
4290 void execute(VPTransformState &State) override;
4291
4292 /// Return the cost of this VPScalarIVStepsRecipe.
4293 InstructionCost computeCost(ElementCount VF,
4294 VPCostContext &Ctx) const override;
4295
4296 VPValue *getStepValue() const { return getOperand(1); }
4297
4298 /// Return the number of scalars to produce per unroll part, used to compute
4299 /// StartIndex during unrolling.
4300 VPValue *getVFValue() const { return getOperand(2); }
4301
4302 /// Return the StartIndex, or null if known to be zero, valid only after
4303 /// unrolling.
4305 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4306 }
4307
4308 /// Set or add the StartIndex operand.
4309 void setStartIndex(VPValue *StartIndex) {
4310 if (getNumOperands() == 4)
4311 setOperand(3, StartIndex);
4312 else
4313 addOperand(StartIndex);
4314 }
4315
4316 /// Returns true if the recipe only uses the first lane of operand \p Op.
4317 bool usesFirstLaneOnly(const VPValue *Op) const override {
4319 "Op must be an operand of the recipe");
4320 return true;
4321 }
4322
4323 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4324
4325protected:
4326#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4327 /// Print the recipe.
4328 void printRecipe(raw_ostream &O, const Twine &Indent,
4329 VPSlotTracker &SlotTracker) const override;
4330#endif
4331};
4332
4333/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4334/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4335/// VPIRMetadata).
4336namespace vpdetail {
4337template <typename VPMixin, typename... RecipeTys>
4339 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4340 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4341 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4342 "Each type in RecipeTys must derive from VPMixin");
4343
4344 /// Used by isa.
4345 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4346
4347 /// Used by cast.
4348 static VPMixin *doCast(VPRecipeBase *R) {
4349 VPMixin *Out = nullptr;
4350 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4351 assert(Out && "Illegal recipe for cast");
4352 return Out;
4353 }
4354 static VPMixin *castFailed() { return nullptr; }
4355};
4356} // namespace vpdetail
4357
4358/// Support casting from VPRecipeBase -> VPPhiAccessors.
4359template <>
4363
4364template <>
4369template <>
4371 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4372 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4373
4374/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4375template <>
4380template <>
4385
4386/// Support casting from VPRecipeBase -> VPIRMetadata.
4387template <>
4393
4394template <>
4399template <>
4401 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4402 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4403
4404/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4405/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4406/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4407class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4408 friend class VPlan;
4409
4410 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4411 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4412 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4413 if (Recipe)
4414 appendRecipe(Recipe);
4415 }
4416
4417public:
4419
4420protected:
4421 /// The VPRecipes held in the order of output instructions to generate.
4423
4424 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4425 : VPBlockBase(BlockSC, Name.str()) {}
4426
4427public:
4428 ~VPBasicBlock() override {
4429 while (!Recipes.empty())
4430 Recipes.pop_back();
4431 }
4432
4433 /// Instruction iterators...
4438
4439 //===--------------------------------------------------------------------===//
4440 /// Recipe iterator methods
4441 ///
4442 inline iterator begin() { return Recipes.begin(); }
4443 inline const_iterator begin() const { return Recipes.begin(); }
4444 inline iterator end() { return Recipes.end(); }
4445 inline const_iterator end() const { return Recipes.end(); }
4446
4447 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4448 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4449 inline reverse_iterator rend() { return Recipes.rend(); }
4450 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4451
4452 inline size_t size() const { return Recipes.size(); }
4453 inline bool empty() const { return Recipes.empty(); }
4454 inline const VPRecipeBase &front() const { return Recipes.front(); }
4455 inline VPRecipeBase &front() { return Recipes.front(); }
4456 inline const VPRecipeBase &back() const { return Recipes.back(); }
4457 inline VPRecipeBase &back() { return Recipes.back(); }
4458
4459 /// Returns a reference to the list of recipes.
4461
4462 /// Returns a pointer to a member of the recipe list.
4463 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4464 return &VPBasicBlock::Recipes;
4465 }
4466
4467 /// Method to support type inquiry through isa, cast, and dyn_cast.
4468 static inline bool classof(const VPBlockBase *V) {
4469 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4470 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4471 }
4472
4473 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4474 assert(Recipe && "No recipe to append.");
4475 assert(!Recipe->Parent && "Recipe already in VPlan");
4476 Recipe->Parent = this;
4477 Recipes.insert(InsertPt, Recipe);
4478 }
4479
4480 /// Augment the existing recipes of a VPBasicBlock with an additional
4481 /// \p Recipe as the last recipe.
4482 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4483
4484 /// The method which generates the output IR instructions that correspond to
4485 /// this VPBasicBlock, thereby "executing" the VPlan.
4486 void execute(VPTransformState *State) override;
4487
4488 /// Return the cost of this VPBasicBlock.
4489 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4490
4491 /// Return the position of the first non-phi node recipe in the block.
4492 iterator getFirstNonPhi();
4493
4494 /// Returns an iterator range over the PHI-like recipes in the block.
4498
4499 /// Split current block at \p SplitAt by inserting a new block between the
4500 /// current block and its successors and moving all recipes starting at
4501 /// SplitAt to the new block. Returns the new block.
4502 VPBasicBlock *splitAt(iterator SplitAt);
4503
4504 VPRegionBlock *getEnclosingLoopRegion();
4505 const VPRegionBlock *getEnclosingLoopRegion() const;
4506
4507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4508 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4509 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4510 ///
4511 /// Note that the numbering is applied to the whole VPlan, so printing
4512 /// individual blocks is consistent with the whole VPlan printing.
4513 void print(raw_ostream &O, const Twine &Indent,
4514 VPSlotTracker &SlotTracker) const override;
4515 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4516#endif
4517
4518 /// If the block has multiple successors, return the branch recipe terminating
4519 /// the block. If there are no or only a single successor, return nullptr;
4520 VPRecipeBase *getTerminator();
4521 const VPRecipeBase *getTerminator() const;
4522
4523 /// Returns true if the block is exiting it's parent region.
4524 bool isExiting() const;
4525
4526 /// Clone the current block and it's recipes, without updating the operands of
4527 /// the cloned recipes.
4528 VPBasicBlock *clone() override;
4529
4530 /// Returns the predecessor block at index \p Idx with the predecessors as per
4531 /// the corresponding plain CFG. If the block is an entry block to a region,
4532 /// the first predecessor is the single predecessor of a region, and the
4533 /// second predecessor is the exiting block of the region.
4534 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4535
4536protected:
4537 /// Execute the recipes in the IR basic block \p BB.
4538 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4539
4540 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4541 /// generated for this VPBB.
4542 void connectToPredecessors(VPTransformState &State);
4543
4544private:
4545 /// Create an IR BasicBlock to hold the output instructions generated by this
4546 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4547 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4548};
4549
4550inline const VPBasicBlock *
4552 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4553}
4554
4555/// A special type of VPBasicBlock that wraps an existing IR basic block.
4556/// Recipes of the block get added before the first non-phi instruction in the
4557/// wrapped block.
4558/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4559/// preheader block.
4560class VPIRBasicBlock : public VPBasicBlock {
4561 friend class VPlan;
4562
4563 BasicBlock *IRBB;
4564
4565 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4566 VPIRBasicBlock(BasicBlock *IRBB)
4567 : VPBasicBlock(VPIRBasicBlockSC,
4568 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4569 IRBB(IRBB) {}
4570
4571public:
4572 ~VPIRBasicBlock() override = default;
4573
4574 static inline bool classof(const VPBlockBase *V) {
4575 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4576 }
4577
4578 /// The method which generates the output IR instructions that correspond to
4579 /// this VPBasicBlock, thereby "executing" the VPlan.
4580 void execute(VPTransformState *State) override;
4581
4582 VPIRBasicBlock *clone() override;
4583
4584 BasicBlock *getIRBasicBlock() const { return IRBB; }
4585};
4586
4587/// Track information about the canonical IV value of a region.
4588/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4590 /// VPRegionValue for the canonical IV, whose allocation is managed by
4591 /// VPCanonicalIVInfo.
4592 std::unique_ptr<VPRegionValue> CanIV;
4593
4594 /// Whether the increment of the canonical IV may unsigned wrap or not.
4595 bool HasNUW = true;
4596
4597public:
4599 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4600
4601 VPRegionValue *getRegionValue() { return CanIV.get(); }
4602 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4603
4604 bool hasNUW() const { return HasNUW; }
4605
4606 void clearNUW() { HasNUW = false; }
4607};
4608
4609/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4610/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4611/// A VPRegionBlock may indicate that its contents are to be replicated several
4612/// times. This is designed to support predicated scalarization, in which a
4613/// scalar if-then code structure needs to be generated VF * UF times. Having
4614/// this replication indicator helps to keep a single model for multiple
4615/// candidate VF's. The actual replication takes place only once the desired VF
4616/// and UF have been determined.
4617class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4618 friend class VPlan;
4619
4620 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4621 VPBlockBase *Entry;
4622
4623 /// Hold the Single Exiting block of the SESE region modelled by the
4624 /// VPRegionBlock.
4625 VPBlockBase *Exiting;
4626
4627 /// Holds the Canonical IV of the loop region along with additional
4628 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4629 /// Loop regions retain their canonical IVs until they are dissolved, even if
4630 /// the canonical IV has no users.
4631 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4632
4633 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4634 /// VPRegionBlocks.
4635 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4636 const std::string &Name = "")
4637 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4638 if (Entry) {
4639 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4640 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4641 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4642 Entry->setParent(this);
4643 Exiting->setParent(this);
4644 }
4645 }
4646
4647 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4648 VPBlockBase *Exiting, const std::string &Name = "")
4649 : VPRegionBlock(Entry, Exiting, Name) {
4650 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4651 }
4652
4653public:
4654 ~VPRegionBlock() override = default;
4655
4656 /// Method to support type inquiry through isa, cast, and dyn_cast.
4657 static inline bool classof(const VPBlockBase *V) {
4658 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4659 }
4660
4661 const VPBlockBase *getEntry() const { return Entry; }
4662 VPBlockBase *getEntry() { return Entry; }
4663
4664 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4665 /// EntryBlock must have no predecessors.
4666 void setEntry(VPBlockBase *EntryBlock) {
4667 assert(!EntryBlock->hasPredecessors() &&
4668 "Entry block cannot have predecessors.");
4669 Entry = EntryBlock;
4670 EntryBlock->setParent(this);
4671 }
4672
4673 const VPBlockBase *getExiting() const { return Exiting; }
4674 VPBlockBase *getExiting() { return Exiting; }
4675
4676 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4677 /// ExitingBlock must have no successors.
4678 void setExiting(VPBlockBase *ExitingBlock) {
4679 assert(!ExitingBlock->hasSuccessors() &&
4680 "Exit block cannot have successors.");
4681 Exiting = ExitingBlock;
4682 ExitingBlock->setParent(this);
4683 }
4684
4685 /// Returns the pre-header VPBasicBlock of the loop region.
4687 assert(!isReplicator() && "should only get pre-header of loop regions");
4688 return getSinglePredecessor()->getExitingBasicBlock();
4689 }
4690
4691 /// An indicator whether this region is to generate multiple replicated
4692 /// instances of output IR corresponding to its VPBlockBases.
4693 bool isReplicator() const { return !CanIVInfo; }
4694
4695 /// The method which generates the output IR instructions that correspond to
4696 /// this VPRegionBlock, thereby "executing" the VPlan.
4697 void execute(VPTransformState *State) override;
4698
4699 // Return the cost of this region.
4700 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4701
4702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4703 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4704 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4705 /// consequtive numbers.
4706 ///
4707 /// Note that the numbering is applied to the whole VPlan, so printing
4708 /// individual regions is consistent with the whole VPlan printing.
4709 void print(raw_ostream &O, const Twine &Indent,
4710 VPSlotTracker &SlotTracker) const override;
4711 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4712#endif
4713
4714 /// Clone all blocks in the single-entry single-exit region of the block and
4715 /// their recipes without updating the operands of the cloned recipes.
4716 VPRegionBlock *clone() override;
4717
4718 /// Remove the current region from its VPlan, connecting its predecessor to
4719 /// its entry, and its exiting block to its successor.
4720 void dissolveToCFGLoop();
4721
4722 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4723 /// a new increment before the terminator and return it. The canonical IV
4724 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4725 VPInstruction *getOrCreateCanonicalIVIncrement();
4726
4727 /// Return the canonical induction variable of the region, null for
4728 /// replicating regions.
4730 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4731 }
4733 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4734 }
4735
4736 /// Return the type of the canonical IV for loop regions.
4738 return CanIVInfo->getRegionValue()->getType();
4739 }
4740
4741 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4742 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4743
4744 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4746 assert(Increment && "Must provide increment to clear");
4747 Increment->dropPoisonGeneratingFlags();
4748 CanIVInfo->clearNUW();
4749 }
4750};
4751
4753 return getParent()->getParent();
4754}
4755
4757 return getParent()->getParent();
4758}
4759
4760/// VPlan models a candidate for vectorization, encoding various decisions take
4761/// to produce efficient output IR, including which branches, basic-blocks and
4762/// output IR instructions to generate, and their cost. VPlan holds a
4763/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4764/// VPBasicBlock.
4765class VPlan {
4766 friend class VPlanPrinter;
4767 friend class VPSlotTracker;
4768
4769 /// VPBasicBlock corresponding to the original preheader. Used to place
4770 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4771 /// rest of VPlan execution.
4772 /// When this VPlan is used for the epilogue vector loop, the entry will be
4773 /// replaced by a new entry block created during skeleton creation.
4774 VPBasicBlock *Entry;
4775
4776 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4777 VPIRBasicBlock *ScalarHeader;
4778
4779 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4780 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4781 /// e.g. if the scalar epilogue always executes.
4783
4784 /// Holds the VFs applicable to this VPlan.
4786
4787 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4788 /// any UF.
4790
4791 /// Holds the name of the VPlan, for printing.
4792 std::string Name;
4793
4794 /// Represents the trip count of the original loop, for folding
4795 /// the tail.
4796 VPValue *TripCount = nullptr;
4797
4798 /// Represents the backedge taken count of the original loop, for folding
4799 /// the tail. It equals TripCount - 1.
4800 VPSymbolicValue *BackedgeTakenCount = nullptr;
4801
4802 /// Represents the vector trip count.
4803 VPSymbolicValue VectorTripCount;
4804
4805 /// Represents the vectorization factor of the loop.
4806 VPSymbolicValue VF;
4807
4808 /// Represents the unroll factor of the loop.
4809 VPSymbolicValue UF;
4810
4811 /// Represents the loop-invariant VF * UF of the vector loop region.
4812 VPSymbolicValue VFxUF;
4813
4814 /// Contains all the external definitions created for this VPlan, as a mapping
4815 /// from IR Values to VPIRValues.
4817
4818 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4819 /// VPlan is destroyed.
4820 SmallVector<VPBlockBase *> CreatedBlocks;
4821
4822 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4823 /// wrapping the original header of the scalar loop. The vector loop will have
4824 /// index type \p IdxTy.
4825 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4826 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4827 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4828 Entry->setPlan(this);
4829 assert(ScalarHeader->getNumSuccessors() == 0 &&
4830 "scalar header must be a leaf node");
4831 }
4832
4833public:
4834 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4835 /// original preheader and scalar header of \p L, to be used as entry and
4836 /// scalar header blocks of the new VPlan. The vector loop will have index
4837 /// type \p IdxTy.
4838 VPlan(Loop *L, Type *IdxTy);
4839
4840 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4841 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4842 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4843 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4844 setEntry(createVPBasicBlock("preheader"));
4845 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4846 }
4847
4849
4851 Entry = VPBB;
4852 VPBB->setPlan(this);
4853 }
4854
4855 /// Generate the IR code for this VPlan.
4856 void execute(VPTransformState *State);
4857
4858 /// Return the cost of this plan.
4860
4861 VPBasicBlock *getEntry() { return Entry; }
4862 const VPBasicBlock *getEntry() const { return Entry; }
4863
4864 /// Returns the preheader of the vector loop region, if one exists, or null
4865 /// otherwise.
4867 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4868 return VectorRegion
4869 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4870 : nullptr;
4871 }
4872
4873 /// Returns the VPRegionBlock of the vector loop.
4876
4877 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4878 /// loop region contains a nested loop region.
4879 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4880
4881 /// Returns the 'middle' block of the plan, that is the block that selects
4882 /// whether to execute the scalar tail loop or the exit block from the loop
4883 /// latch. If there is an early exit from the vector loop, the middle block
4884 /// conceptully has the early exit block as third successor, split accross 2
4885 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4886 /// tail loop or the exit block. If the scalar tail loop or exit block are
4887 /// known to always execute, the middle block may branch directly to that
4888 /// block. This function cannot be called once the vector loop region has been
4889 /// removed.
4891 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4892 assert(
4893 LoopRegion &&
4894 "cannot call the function after vector loop region has been removed");
4895 // The middle block is always the last successor of the region.
4896 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4897 }
4898
4900 return const_cast<VPlan *>(this)->getMiddleBlock();
4901 }
4902
4903 /// Return the VPBasicBlock for the preheader of the scalar loop.
4906 getScalarHeader()->getSinglePredecessor());
4907 }
4908
4909 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4910 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4911
4912 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4913 /// the original scalar loop.
4914 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4915
4916 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4917 /// exit block.
4919
4920 /// Returns true if \p VPBB is an exit block.
4921 bool isExitBlock(VPBlockBase *VPBB);
4922
4923 /// The trip count of the original loop.
4925 assert(TripCount && "trip count needs to be set before accessing it");
4926 return TripCount;
4927 }
4928
4929 /// Set the trip count assuming it is currently null; if it is not - use
4930 /// resetTripCount().
4931 void setTripCount(VPValue *NewTripCount) {
4932 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4933 TripCount = NewTripCount;
4934 }
4935
4936 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4937 /// the original trip count have been replaced.
4938 void resetTripCount(VPValue *NewTripCount) {
4939 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4940 "TripCount must be set when resetting");
4941 TripCount = NewTripCount;
4942 }
4943
4944 /// The backedge taken count of the original loop.
4946 // BTC shares the canonical IV type with VectorTripCount.
4947 if (!BackedgeTakenCount)
4948 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4949 return BackedgeTakenCount;
4950 }
4951 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4952
4953 /// The vector trip count.
4954 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4955
4956 /// Returns the VF of the vector loop region.
4957 VPSymbolicValue &getVF() { return VF; };
4958 const VPSymbolicValue &getVF() const { return VF; };
4959
4960 /// Returns the UF of the vector loop region.
4961 VPSymbolicValue &getUF() { return UF; };
4962
4963 /// Returns VF * UF of the vector loop region.
4964 VPSymbolicValue &getVFxUF() { return VFxUF; }
4965
4968 }
4969
4970 const DataLayout &getDataLayout() const {
4972 }
4973
4974 void addVF(ElementCount VF) { VFs.insert(VF); }
4975
4977 assert(hasVF(VF) && "Cannot set VF not already in plan");
4978 VFs.clear();
4979 VFs.insert(VF);
4980 }
4981
4982 /// Remove \p VF from the plan.
4984 assert(hasVF(VF) && "tried to remove VF not present in plan");
4985 VFs.remove(VF);
4986 }
4987
4988 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4989 bool hasScalableVF() const {
4990 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4991 }
4992
4993 /// Returns an iterator range over all VFs of the plan.
4996 return VFs;
4997 }
4998
4999 /// Returns the single VF of the plan, asserting that the plan has exactly
5000 /// one VF.
5002 assert(VFs.size() == 1 && "expected plan with single VF");
5003 return VFs[0];
5004 }
5005
5006 bool hasScalarVFOnly() const {
5007 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
5008 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
5009 "Plan with scalar VF should only have a single VF");
5010 return HasScalarVFOnly;
5011 }
5012
5013 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
5014
5015 /// Returns the concrete UF of the plan, after unrolling.
5016 unsigned getConcreteUF() const {
5017 assert(UFs.size() == 1 && "Expected a single UF");
5018 return UFs[0];
5019 }
5020
5021 void setUF(unsigned UF) {
5022 assert(hasUF(UF) && "Cannot set the UF not already in plan");
5023 UFs.clear();
5024 UFs.insert(UF);
5025 }
5026
5027 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
5028 /// concrete UF.
5029 bool isUnrolled() const { return UFs.size() == 1; }
5030
5031 /// Return a string with the name of the plan and the applicable VFs and UFs.
5032 std::string getName() const;
5033
5034 void setName(const Twine &newName) { Name = newName.str(); }
5035
5036 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
5037 /// yet) for \p V.
5039 assert(V && "Trying to get or add the VPIRValue of a null Value");
5040 auto [It, Inserted] = LiveIns.try_emplace(V);
5041 if (Inserted) {
5042 if (auto *CI = dyn_cast<ConstantInt>(V))
5043 It->second = new VPConstantInt(CI);
5044 else
5045 It->second = new VPIRValue(V);
5046 }
5047
5048 assert(isa<VPIRValue>(It->second) &&
5049 "Only VPIRValues should be in mapping");
5050 return It->second;
5051 }
5053 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
5054 return getOrAddLiveIn(V->getValue());
5055 }
5056
5057 /// Return a VPIRValue wrapping i1 true.
5058 VPIRValue *getTrue() { return getConstantInt(1, 1); }
5059
5060 /// Return a VPIRValue wrapping i1 false.
5061 VPIRValue *getFalse() { return getConstantInt(1, 0); }
5062
5063 /// Return a VPIRValue wrapping the null value of type \p Ty.
5064 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
5065
5066 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
5068 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
5069 }
5070
5071 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
5072 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
5073 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
5074 }
5075
5076 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
5077 /// value.
5079 bool IsSigned = false) {
5080 return getConstantInt(APInt(BitWidth, Val, IsSigned));
5081 }
5082
5083 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
5085 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
5086 }
5087
5088 /// Return a VPIRValue wrapping a poison value of type \p Ty.
5090 return getOrAddLiveIn(PoisonValue::get(Ty));
5091 }
5092
5093 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
5094 /// otherwise.
5095 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
5096
5097 /// Return the list of live-in VPValues available in the VPlan.
5098 auto getLiveIns() const { return LiveIns.values(); }
5099
5100#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5101 /// Print the live-ins of this VPlan to \p O.
5102 void printLiveIns(raw_ostream &O) const;
5103
5104 /// Print this VPlan to \p O.
5105 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
5106
5107 /// Print this VPlan in DOT format to \p O.
5108 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
5109
5110 /// Dump the plan to stderr (for debugging).
5111 LLVM_DUMP_METHOD void dump() const;
5112#endif
5113
5114 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
5115 /// recipes to refer to the clones, and return it.
5117
5118 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
5119 /// present. The returned block is owned by the VPlan and deleted once the
5120 /// VPlan is destroyed.
5122 VPRecipeBase *Recipe = nullptr) {
5123 auto *VPB = new VPBasicBlock(Name, Recipe);
5124 CreatedBlocks.push_back(VPB);
5125 return VPB;
5126 }
5127
5128 /// Create a new loop region with a canonical IV using \p CanIVTy and
5129 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
5130 /// to \p Entry and \p Exiting respectively, if provided. The returned block
5131 /// is owned by the VPlan and deleted once the VPlan is destroyed.
5133 const std::string &Name = "",
5134 VPBlockBase *Entry = nullptr,
5135 VPBlockBase *Exiting = nullptr) {
5136 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
5137 CreatedBlocks.push_back(VPB);
5138 return VPB;
5139 }
5140
5141 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
5142 /// returned block is owned by the VPlan and deleted once the VPlan is
5143 /// destroyed.
5145 const std::string &Name = "") {
5146 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
5147 CreatedBlocks.push_back(VPB);
5148 return VPB;
5149 }
5150
5151 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5152 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5153 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5155
5156 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5157 /// instructions in \p IRBB, except its terminator which is managed by the
5158 /// successors of the block in VPlan. The returned block is owned by the VPlan
5159 /// and deleted once the VPlan is destroyed.
5161
5162 /// Returns true if the VPlan is based on a loop with an early exit. That is
5163 /// the case if the VPlan has either more than one exit block or a single exit
5164 /// block with multiple predecessors (one for the exit via the latch and one
5165 /// via the other early exit).
5166 bool hasEarlyExit() const {
5167 return count_if(ExitBlocks,
5168 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5169 1 ||
5170 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5171 }
5172
5173 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5174 /// if the middle block is a predecessor of the scalar preheader. Note that
5175 /// this relies on unneeded branches to the scalar tail loop being removed.
5176 bool hasScalarTail() const {
5177 auto *ScalarPH = getScalarPreheader();
5178 return ScalarPH &&
5179 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
5180 }
5181
5182 /// The type of the canonical induction variable of the vector loop.
5183 Type *getIndexType() const { return VF.getType(); }
5184};
5185
5186#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5187inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5188 Plan.print(OS);
5189 return OS;
5190}
5191#endif
5192
5193} // end namespace llvm
5194
5195#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition Compiler.h:215
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:220
#define LLVM_PACKED_START
Definition Compiler.h:556
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
static Interval intersect(const Interval &I1, const Interval &I2)
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:585
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:151
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:151
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1069
Root of the metadata hierarchy.
Definition Metadata.h:64
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4063
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:4057
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4407
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4435
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4482
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4437
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4434
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4460
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4418
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4424
iterator end()
Definition VPlan.h:4444
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4442
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4436
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4495
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:763
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
~VPBasicBlock() override
Definition VPlan.h:4428
const_reverse_iterator rbegin() const
Definition VPlan.h:4448
reverse_iterator rend()
Definition VPlan.h:4449
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4422
VPRecipeBase & back()
Definition VPlan.h:4457
const VPRecipeBase & front() const
Definition VPlan.h:4454
const_iterator begin() const
Definition VPlan.h:4443
VPRecipeBase & front()
Definition VPlan.h:4455
const VPRecipeBase & back() const
Definition VPlan.h:4456
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4473
bool empty() const
Definition VPlan.h:4453
const_iterator end() const
Definition VPlan.h:4445
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4468
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4463
reverse_iterator rbegin()
Definition VPlan.h:4447
friend class VPlan
Definition VPlan.h:4408
size_t size() const
Definition VPlan.h:4452
const_reverse_iterator rend() const
Definition VPlan.h:4450
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:3002
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:3007
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2963
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2997
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3019
VPBlendRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:2984
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2982
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:3013
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2993
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:315
VPRegionBlock * getParent()
Definition VPlan.h:186
VPBlocksTy & getPredecessors()
Definition VPlan.h:223
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:220
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:385
void setName(const Twine &newName)
Definition VPlan.h:179
size_t getNumSuccessors() const
Definition VPlan.h:237
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:219
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:217
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:337
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:173
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:273
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:350
size_t getNumPredecessors() const
Definition VPlan.h:238
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:306
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:343
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:215
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:171
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const VPRegionBlock * getParent() const
Definition VPlan.h:187
const std::string & getName() const
Definition VPlan.h:177
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:325
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:263
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:297
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:233
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:257
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:322
friend class VPBlockUtils
Definition VPlan.h:95
unsigned getVPBlockID() const
Definition VPlan.h:184
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:364
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:329
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:163
VPBlocksTy & getSuccessors()
Definition VPlan.h:212
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:286
void setParent(VPRegionBlock *P)
Definition VPlan.h:197
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:279
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3523
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3507
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3531
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3504
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4601
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4598
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4602
bool hasNUW() const
Definition VPlan.h:4604
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4095
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:4089
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:4107
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:4101
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4114
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4231
VPValue * getIndex() const
Definition VPlan.h:4228
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:4230
VPIRValue * getStartValue() const
Definition VPlan.h:4227
VPValue * getStepValue() const
Definition VPlan.h:4229
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4219
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4212
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:4203
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4234
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:4196
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4032
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:4037
VPExpandSCEVRecipe(const SCEV *Expr)
const SCEV * getSCEV() const
Definition VPlan.h:4043
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4028
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3682
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3664
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3593
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3646
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3634
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3591
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3609
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3613
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3676
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3607
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2436
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2438
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2447
void addBackedgeValue(VPValue *V)
Add V as the incoming value from the loop backedge.
Definition VPlan.h:2491
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2460
static bool classof(const VPValue *V)
Definition VPlan.h:2457
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2483
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2443
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2488
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2472
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2480
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2453
VPValue * getStartValue() const
Definition VPlan.h:2475
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2501
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition VPlan.h:2163
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2176
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2193
unsigned getOpcode() const
Definition VPlan.h:2189
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2168
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4560
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:473
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4584
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4574
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4561
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:498
Class to record and manage LLVM IR flags.
Definition VPlan.h:695
FastMathFlagsTy FMFs
Definition VPlan.h:783
ReductionFlagsTy ReductionFlags
Definition VPlan.h:785
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1038
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:876
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:856
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:842
WrapFlagsTy WrapFlags
Definition VPlan.h:777
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:835
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:1000
bool isReductionOrdered() const
Definition VPlan.h:1064
TruncFlagsTy TruncFlags
Definition VPlan.h:778
CmpInst::Predicate getPredicate() const
Definition VPlan.h:972
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1048
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlagsOrNone() const
uint8_t AllFlags[2]
Definition VPlan.h:786
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1008
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:881
ExactFlagsTy ExactFlags
Definition VPlan.h:780
bool hasNoSignedWrap() const
Definition VPlan.h:1027
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1052
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:847
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:852
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:861
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:830
uint8_t GEPFlagsStorage
Definition VPlan.h:781
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:866
bool isNonNeg() const
Definition VPlan.h:1010
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:990
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:995
DisjointFlagsTy DisjointFlags
Definition VPlan.h:779
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:980
bool hasNoUnsignedWrap() const
Definition VPlan.h:1016
FCmpFlagsTy FCmpFlags
Definition VPlan.h:784
NonNegFlagsTy NonNegFlags
Definition VPlan.h:782
bool isReductionInLoop() const
Definition VPlan.h:1070
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:892
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:929
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:871
uint8_t CmpPredStorage
Definition VPlan.h:776
RecurKind getRecurKind() const
Definition VPlan.h:1058
VPIRFlags(Instruction &I)
Definition VPlan.h:792
Instruction & getInstruction() const
Definition VPlan.h:1752
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1760
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1739
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1766
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1754
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1727
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1171
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1207
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1179
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
VPIRMetadata()=default
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1191
static bool classof(const VPUser *R)
Definition VPlan.h:1570
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1550
Type * getResultType() const
Definition VPlan.h:1588
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1574
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Value *UV=nullptr)
Definition VPlan.h:1541
void execute(VPTransformState &State) override
Generate the instruction.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
bool usesScalars(const VPValue *Op) const override
Cast recipes always use scalars of their operand.
Definition VPlan.h:1591
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1473
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1495
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1404
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1328
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1319
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1332
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1344
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1322
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1269
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1315
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1264
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1261
@ VScale
Returns the value for vscale.
Definition VPlan.h:1348
@ CanonicalIVIncrementForPart
Definition VPlan.h:1245
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1272
bool hasResult() const
Definition VPlan.h:1438
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1498
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1478
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1520
unsigned getOpcode() const
Definition VPlan.h:1417
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1523
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1489
VPInstruction * cloneWithOperands(ArrayRef< VPValue * > NewOperands, Type *ResultTy=nullptr)
Definition VPlan.h:1408
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1463
A common base class for interleaved memory operations.
Definition VPlan.h:3044
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3107
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3113
static bool classof(const VPUser *U)
Definition VPlan.h:3089
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3056
Instruction * getInsertPos() const
Definition VPlan.h:3111
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3084
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3109
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3101
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3130
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3095
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3210
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3204
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3217
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3197
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3184
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:3140
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3167
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3150
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3161
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3142
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
A VPRecipeValue defined by a multi-def recipe, stores a pointer to it.
Definition VPlanValue.h:364
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1607
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1636
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
Definition VPlan.h:1665
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1631
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4551
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1656
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1616
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1641
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1645
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3722
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3733
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3717
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:550
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4752
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenMemIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:420
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:558
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:477
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:526
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:404
const VPBasicBlock * getParent() const
Definition VPlan.h:478
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:531
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:523
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:467
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3376
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3355
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3379
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3366
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2924
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2915
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2897
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2908
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2936
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2878
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2927
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2941
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPReductionPHIRecipe * cloneWithOperands(VPValue *Start, VPValue *BackedgeValue)
Definition VPlan.h:2890
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2933
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2921
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3233
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3242
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3318
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3287
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3302
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3329
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3331
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3314
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3267
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3316
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3273
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3320
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3327
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3322
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3281
static bool classof(const VPUser *U)
Definition VPlan.h:3292
static bool classof(const VPValue *VPV)
Definition VPlan.h:3297
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3336
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4617
const VPBlockBase * getEntry() const
Definition VPlan.h:4661
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4693
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4678
VPBlockBase * getExiting()
Definition VPlan.h:4674
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4732
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4666
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4737
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4742
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4745
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4729
const VPBlockBase * getExiting() const
Definition VPlan.h:4673
VPBlockBase * getEntry()
Definition VPlan.h:4662
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4686
friend class VPlan
Definition VPlan.h:4618
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4657
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3398
bool isSingleScalar() const
Definition VPlan.h:3456
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the recipe is predicated.
Definition VPlan.h:3487
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3406
~VPReplicateRecipe() override=default
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
VPReplicateRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:3430
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3468
operand_range operandsWithoutMask()
Return the recipe's operands, excluding the mask of a predicated recipe.
Definition VPlan.h:3481
bool isPredicated() const
Definition VPlan.h:3458
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3428
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3461
unsigned getOpcode() const
Definition VPlan.h:3491
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3475
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4323
VPValue * getStepValue() const
Definition VPlan.h:4296
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4266
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4309
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4278
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4304
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4300
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4259
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4317
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:609
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:615
static bool classof(const VPValue *V)
Definition VPlan.h:667
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:680
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:624
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:683
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, Value *UV=nullptr, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:619
static bool classof(const VPUser *U)
Definition VPlan.h:672
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:611
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1520
operand_range operands()
Definition VPlanValue.h:457
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:430
unsigned getNumOperands() const
Definition VPlanValue.h:424
operand_iterator op_end()
Definition VPlanValue.h:455
operand_iterator op_begin()
Definition VPlanValue.h:453
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:425
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:405
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:451
virtual bool usesScalars(const VPValue *Op) const
Returns true if the VPUser uses scalars of operand Op.
Definition VPlanValue.h:464
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:450
void addOperand(VPValue *Operand)
Definition VPlanValue.h:410
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208
unsigned getNumUsers() const
Definition VPlanValue.h:115
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2306
VPValue * getVFValue() const
Definition VPlan.h:2287
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2284
int64_t getStride() const
Definition VPlan.h:2285
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2327
VPValue * getOffset() const
Definition VPlan.h:2288
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2320
void addOffset(VPValue *Offset)
Append Offset as the offset operand.
Definition VPlan.h:2298
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2274
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2313
VPValue * getPointer() const
Definition VPlan.h:2286
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
void addPerPartOffset(VPValue *VFxPart)
Add the per-part offset (VFxPart) used for unrolled parts > 0.
Definition VPlan.h:2368
VPValue * getStride() const
Definition VPlan.h:2361
Type * getSourceElementType() const
Definition VPlan.h:2376
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2378
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2385
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2352
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2402
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2392
VPValue * getVFxPart() const
Definition VPlan.h:2363
A recipe for widening Call instructions using library calls.
Definition VPlan.h:2097
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:2104
const_operand_range args() const
Definition VPlan.h:2145
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2123
operand_range args()
Definition VPlan.h:2144
Function * getCalledScalarFunction() const
Definition VPlan.h:2140
~VPWidenCallRecipe() override=default
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV, const VPIRFlags::WrapFlagsTy &Flags={false, false})
Definition VPlan.h:4133
~VPWidenCanonicalIVRecipe() override=default
VPValue * getStepValue() const
Definition VPlan.h:4166
void addPerPartStep(VPValue *Step)
Add the per-part step (VF * Part) used for unrolled parts.
Definition VPlan.h:4171
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:4155
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:4162
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4140
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4150
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1878
Instruction::CastOps getOpcode() const
Definition VPlan.h:1914
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1883
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1899
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2236
Type * getSourceElementType() const
Definition VPlan.h:2241
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2244
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2227
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), GetElementPtrInst *UV=nullptr)
Definition VPlan.h:2210
void execute(VPTransformState &State) override=0
Generate the phi nodes.
ArrayRef< const SCEVPredicate * > getNoWrapPredicates() const
Returns the SCEV predicates associated with this induction.
Definition VPlan.h:2590
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2609
static bool classof(const VPValue *V)
Definition VPlan.h:2552
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2571
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2594
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2564
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2579
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2582
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2520
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2567
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2526
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2587
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2601
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2547
const VPValue * getVFValue() const
Definition VPlan.h:2574
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2557
const VPValue * getStepValue() const
Definition VPlan.h:2568
void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart)
After unrolling, append the splat-VF step (VF * step) and the value of the induction at the last unro...
Definition VPlan.h:2535
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2670
const TruncInst * getTruncInst() const
Definition VPlan.h:2686
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2664
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2674
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2656
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2630
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2685
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2639
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2696
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2681
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1925
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1975
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:2029
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:2035
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1961
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:2041
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2011
static bool classof(const VPValue *V)
Definition VPlan.h:2006
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1986
VPWidenIntrinsicRecipe(const unsigned char SC, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1939
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:2038
~VPWidenIntrinsicRecipe() override=default
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1996
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static bool classof(const VPUser *U)
Definition VPlan.h:2001
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
~VPWidenMemIntrinsicRecipe() override=default
VPWidenMemIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2074
VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2060
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3749
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3760
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3785
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3751
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3807
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3757
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3795
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3754
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3772
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3791
void setMask(VPValue *Mask)
Definition VPlan.h:3762
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3802
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3788
A recipe for widened phis.
Definition VPlan.h:2754
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2796
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2774
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingValues, debug location DL and Name.
Definition VPlan.h:2761
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2723
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2732
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2713
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1817
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1838
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1867
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1821
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1828
~VPWidenRecipe() override=default
VPWidenRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1840
unsigned getOpcode() const
Definition VPlan.h:1857
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4765
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:5095
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1170
friend class VPSlotTracker
Definition VPlan.h:4767
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1146
bool hasVF(ElementCount VF) const
Definition VPlan.h:4988
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:5001
const DataLayout & getDataLayout() const
Definition VPlan.h:4970
LLVMContext & getContext() const
Definition VPlan.h:4966
VPBasicBlock * getEntry()
Definition VPlan.h:4861
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5183
void setName(const Twine &newName)
Definition VPlan.h:5034
bool hasScalableVF() const
Definition VPlan.h:4989
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4924
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4945
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4995
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:902
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:885
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:5052
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:910
const VPBasicBlock * getEntry() const
Definition VPlan.h:4862
friend class VPlanPrinter
Definition VPlan.h:4766
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:5061
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:5084
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4964
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:5067
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:5144
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1305
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:5098
bool hasUF(unsigned UF) const
Definition VPlan.h:5013
VPIRValue * getPoison(Type *Ty)
Return a VPIRValue wrapping a poison value of type Ty.
Definition VPlan.h:5089
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4914
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4842
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4954
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4951
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:5038
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:5132
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:5064
void setVF(ElementCount VF)
Definition VPlan.h:4976
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:5029
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1061
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5166
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1043
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1076
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:5016
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:5078
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4899
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4931
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4938
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4890
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4850
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:5121
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1311
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4983
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:5058
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4866
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1176
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4961
bool hasScalarVFOnly() const
Definition VPlan.h:5006
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4904
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:920
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1129
void addVF(ElementCount VF)
Definition VPlan.h:4974
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4910
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1085
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4957
void setUF(unsigned UF)
Definition VPlan.h:5021
const VPSymbolicValue & getVF() const
Definition VPlan.h:4958
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5176
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1217
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5072
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4336
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1113
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2851
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:89
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:377
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2849
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:860
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2843
Possible variants of a reduction.
Definition VPlan.h:2841
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2846
unsigned VFScaleFactor
Definition VPlan.h:2847
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:264
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2812
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2824
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2803
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:727
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:732
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:722
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:715
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1785
PHINode & getIRPhi()
Definition VPlan.h:1798
VPIRPhi(PHINode &PN)
Definition VPlan.h:1786
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1788
static bool classof(const VPUser *U)
Definition VPlan.h:1793
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1809
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
static bool classof(const VPUser *U)
Definition VPlan.h:1685
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1700
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1715
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1695
static bool classof(const VPValue *V)
Definition VPlan.h:1690
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="", Type *ResultTy=nullptr)
Definition VPlan.h:1680
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1117
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1158
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1129
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1118
static bool classof(const VPValue *V)
Definition VPlan.h:1151
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1123
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1146
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3863
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3873
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3880
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3864
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3890
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3813
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3814
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3839
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3822
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3833
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3966
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3982
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3975
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3967
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3995
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3985
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3912
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3913
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3930
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3921
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3936
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3942
static VPMixin * castFailed()
Definition VPlan.h:4354
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4345
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4348