LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class Value;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77using VPlanPtr = std::unique_ptr<VPlan>;
78
79/// \enum UncountableExitStyle
80/// Different methods of handling early exits.
81///
84 /// No side effects to worry about, so we can process any uncountable exits
85 /// in the loop and branch either to the middle block if the trip count was
86 /// reached, or an early exitblock to determine which exit was taken.
88 /// All memory operations other than the load(s) required to determine whether
89 /// an uncountable exit occurre will be masked based on that condition. If an
90 /// uncountable exit is taken, then all lanes before the exiting lane will
91 /// complete, leaving just the final lane to execute in the scalar tail.
93};
94
95/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
96/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
98 friend class VPBlockUtils;
99
100 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
101
102 /// An optional name for the block.
103 std::string Name;
104
105 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
106 /// it is a topmost VPBlockBase.
107 VPRegionBlock *Parent = nullptr;
108
109 /// List of predecessor blocks.
111
112 /// List of successor blocks.
114
115 /// VPlan containing the block. Can only be set on the entry block of the
116 /// plan.
117 VPlan *Plan = nullptr;
118
119 /// Add \p Successor as the last successor to this block.
120 void appendSuccessor(VPBlockBase *Successor) {
121 assert(Successor && "Cannot add nullptr successor!");
122 Successors.push_back(Successor);
123 }
124
125 /// Add \p Predecessor as the last predecessor to this block.
126 void appendPredecessor(VPBlockBase *Predecessor) {
127 assert(Predecessor && "Cannot add nullptr predecessor!");
128 Predecessors.push_back(Predecessor);
129 }
130
131 /// Remove \p Predecessor from the predecessors of this block.
132 void removePredecessor(VPBlockBase *Predecessor) {
133 auto Pos = find(Predecessors, Predecessor);
134 assert(Pos && "Predecessor does not exist");
135 Predecessors.erase(Pos);
136 }
137
138 /// Remove \p Successor from the successors of this block.
139 void removeSuccessor(VPBlockBase *Successor) {
140 auto Pos = find(Successors, Successor);
141 assert(Pos && "Successor does not exist");
142 Successors.erase(Pos);
143 }
144
145 /// This function replaces one predecessor with another, useful when
146 /// trying to replace an old block in the CFG with a new one.
147 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
148 auto I = find(Predecessors, Old);
149 assert(I != Predecessors.end());
150 assert(Old->getParent() == New->getParent() &&
151 "replaced predecessor must have the same parent");
152 *I = New;
153 }
154
155 /// This function replaces one successor with another, useful when
156 /// trying to replace an old block in the CFG with a new one.
157 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
158 auto I = find(Successors, Old);
159 assert(I != Successors.end());
160 assert(Old->getParent() == New->getParent() &&
161 "replaced successor must have the same parent");
162 *I = New;
163 }
164
165protected:
166 VPBlockBase(const unsigned char SC, const std::string &N)
167 : SubclassID(SC), Name(N) {}
168
169public:
170 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
171 /// that are actually instantiated. Values of this enumeration are kept in the
172 /// SubclassID field of the VPBlockBase objects. They are used for concrete
173 /// type identification.
174 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
175
177
178 virtual ~VPBlockBase() = default;
179
180 const std::string &getName() const { return Name; }
181
182 void setName(const Twine &newName) { Name = newName.str(); }
183
184 /// \return an ID for the concrete type of this object.
185 /// This is used to implement the classof checks. This should not be used
186 /// for any other purpose, as the values may change as LLVM evolves.
187 unsigned getVPBlockID() const { return SubclassID; }
188
189 VPRegionBlock *getParent() { return Parent; }
190 const VPRegionBlock *getParent() const { return Parent; }
191
192 /// \return A pointer to the plan containing the current block.
193 VPlan *getPlan();
194 const VPlan *getPlan() const;
195
196 /// Sets the pointer of the plan containing the block. The block must be the
197 /// entry block into the VPlan.
198 void setPlan(VPlan *ParentPlan);
199
200 void setParent(VPRegionBlock *P) { Parent = P; }
201
202 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
203 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
204 /// VPBlockBase is a VPBasicBlock, it is returned.
205 const VPBasicBlock *getEntryBasicBlock() const;
206 VPBasicBlock *getEntryBasicBlock();
207
208 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
209 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
210 /// VPBlockBase is a VPBasicBlock, it is returned.
211 const VPBasicBlock *getExitingBasicBlock() const;
212 VPBasicBlock *getExitingBasicBlock();
213
214 const VPBlocksTy &getSuccessors() const { return Successors; }
215 VPBlocksTy &getSuccessors() { return Successors; }
216
217 /// Returns true if this block has any successors.
218 bool hasSuccessors() const { return !Successors.empty(); }
219 /// Returns true if this block has any predecessors.
220 bool hasPredecessors() const { return !Predecessors.empty(); }
221
224
225 const VPBlocksTy &getPredecessors() const { return Predecessors; }
226 VPBlocksTy &getPredecessors() { return Predecessors; }
227
228 /// \return the successor of this VPBlockBase if it has a single successor.
229 /// Otherwise return a null pointer.
231 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
232 }
233
234 /// \return the predecessor of this VPBlockBase if it has a single
235 /// predecessor. Otherwise return a null pointer.
237 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
238 }
239
240 size_t getNumSuccessors() const { return Successors.size(); }
241 size_t getNumPredecessors() const { return Predecessors.size(); }
242
243 /// An Enclosing Block of a block B is any block containing B, including B
244 /// itself. \return the closest enclosing block starting from "this", which
245 /// has successors. \return the root enclosing block if all enclosing blocks
246 /// have no successors.
247 VPBlockBase *getEnclosingBlockWithSuccessors();
248
249 /// \return the closest enclosing block starting from "this", which has
250 /// predecessors. \return the root enclosing block if all enclosing blocks
251 /// have no predecessors.
252 VPBlockBase *getEnclosingBlockWithPredecessors();
253
254 /// \return the successors either attached directly to this VPBlockBase or, if
255 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
256 /// successors of its own, search recursively for the first enclosing
257 /// VPRegionBlock that has successors and return them. If no such
258 /// VPRegionBlock exists, return the (empty) successors of the topmost
259 /// VPBlockBase reached.
261 return getEnclosingBlockWithSuccessors()->getSuccessors();
262 }
263
264 /// \return the hierarchical successor of this VPBlockBase if it has a single
265 /// hierarchical successor. Otherwise return a null pointer.
267 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
268 }
269
270 /// \return the predecessors either attached directly to this VPBlockBase or,
271 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
272 /// predecessors of its own, search recursively for the first enclosing
273 /// VPRegionBlock that has predecessors and return them. If no such
274 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
275 /// VPBlockBase reached.
277 return getEnclosingBlockWithPredecessors()->getPredecessors();
278 }
279
280 /// \return the hierarchical predecessor of this VPBlockBase if it has a
281 /// single hierarchical predecessor. Otherwise return a null pointer.
285
286 /// Set a given VPBlockBase \p Successor as the single successor of this
287 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
288 /// This VPBlockBase must have no successors.
290 assert(Successors.empty() && "Setting one successor when others exist.");
291 assert(Successor->getParent() == getParent() &&
292 "connected blocks must have the same parent");
293 appendSuccessor(Successor);
294 }
295
296 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
297 /// successors of this VPBlockBase. This VPBlockBase is not added as
298 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
299 /// successors.
300 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
301 assert(Successors.empty() && "Setting two successors when others exist.");
302 appendSuccessor(IfTrue);
303 appendSuccessor(IfFalse);
304 }
305
306 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
307 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
308 /// as successor of any VPBasicBlock in \p NewPreds.
310 assert(Predecessors.empty() && "Block predecessors already set.");
311 for (auto *Pred : NewPreds)
312 appendPredecessor(Pred);
313 }
314
315 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
316 /// This VPBlockBase must have no successors. This VPBlockBase is not added
317 /// as predecessor of any VPBasicBlock in \p NewSuccs.
319 assert(Successors.empty() && "Block successors already set.");
320 for (auto *Succ : NewSuccs)
321 appendSuccessor(Succ);
322 }
323
324 /// Remove all the predecessor of this block.
325 void clearPredecessors() { Predecessors.clear(); }
326
327 /// Remove all the successors of this block.
328 void clearSuccessors() { Successors.clear(); }
329
330 /// Swap predecessors of the block. The block must have exactly 2
331 /// predecessors.
333 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
334 std::swap(Predecessors[0], Predecessors[1]);
335 }
336
337 /// Swap successors of the block. The block must have exactly 2 successors.
338 // TODO: This should be part of introducing conditional branch recipes rather
339 // than being independent.
341 assert(Successors.size() == 2 && "must have 2 successors to swap");
342 std::swap(Successors[0], Successors[1]);
343 }
344
345 /// Returns the index for \p Pred in the blocks predecessors list.
346 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
347 assert(count(Predecessors, Pred) == 1 &&
348 "must have Pred exactly once in Predecessors");
349 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
350 }
351
352 /// Returns the index for \p Succ in the blocks successor list.
353 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
354 assert(count(Successors, Succ) == 1 &&
355 "must have Succ exactly once in Successors");
356 return std::distance(Successors.begin(), find(Successors, Succ));
357 }
358
359 /// The method which generates the output IR that correspond to this
360 /// VPBlockBase, thereby "executing" the VPlan.
361 virtual void execute(VPTransformState *State) = 0;
362
363 /// Return the cost of the block.
365
366#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
367 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
368 OS << getName();
369 }
370
371 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
372 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
373 /// consequtive numbers.
374 ///
375 /// Note that the numbering is applied to the whole VPlan, so printing
376 /// individual blocks is consistent with the whole VPlan printing.
377 virtual void print(raw_ostream &O, const Twine &Indent,
378 VPSlotTracker &SlotTracker) const = 0;
379
380 /// Print plain-text dump of this VPlan to \p O.
381 void print(raw_ostream &O) const;
382
383 /// Print the successors of this block to \p O, prefixing all lines with \p
384 /// Indent.
385 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
386
387 /// Dump this VPBlockBase to dbgs().
388 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
389#endif
390
391 /// Clone the current block and it's recipes without updating the operands of
392 /// the cloned recipes, including all blocks in the single-entry single-exit
393 /// region for VPRegionBlocks.
394 virtual VPBlockBase *clone() = 0;
395};
396
397/// VPRecipeBase is a base class modeling a sequence of one or more output IR
398/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
399/// and is responsible for deleting its defined values. Single-value
400/// recipes must inherit from VPSingleDef instead of inheriting from both
401/// VPRecipeBase and VPValue separately.
403 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
404 public VPDef,
405 public VPUser {
406 friend VPBasicBlock;
407 friend class VPBlockUtils;
408
409 /// Subclass identifier (for isa/dyn_cast).
410 const unsigned char SubclassID;
411
412 /// Each VPRecipe belongs to a single VPBasicBlock.
413 VPBasicBlock *Parent = nullptr;
414
415 /// The debug location for the recipe.
416 DebugLoc DL;
417
418public:
419 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
420 /// that is actually instantiated. Values of this enumeration are kept in the
421 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
422 /// type identification.
423 using VPRecipeTy = enum {
424 VPBranchOnMaskSC,
425 VPDerivedIVSC,
426 VPExpandSCEVSC,
427 VPExpressionSC,
428 VPIRInstructionSC,
429 VPInstructionSC,
430 VPInterleaveEVLSC,
431 VPInterleaveSC,
432 VPReductionEVLSC,
433 VPReductionSC,
434 VPReplicateSC,
435 VPScalarIVStepsSC,
436 VPVectorPointerSC,
437 VPVectorEndPointerSC,
438 VPWidenCallSC,
439 VPWidenCanonicalIVSC,
440 VPWidenCastSC,
441 VPWidenGEPSC,
442 VPWidenIntrinsicSC,
443 VPWidenLoadEVLSC,
444 VPWidenLoadSC,
445 VPWidenStoreEVLSC,
446 VPWidenStoreSC,
447 VPWidenSC,
448 VPBlendSC,
449 VPHistogramSC,
450 // START: Phi-like recipes. Need to be kept together.
451 VPWidenPHISC,
452 VPPredInstPHISC,
453 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
454 // VPHeaderPHIRecipe need to be kept together.
455 VPCurrentIterationPHISC,
456 VPActiveLaneMaskPHISC,
457 VPFirstOrderRecurrencePHISC,
458 VPWidenIntOrFpInductionSC,
459 VPWidenPointerInductionSC,
460 VPReductionPHISC,
461 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
462 // END: Phi-like recipes
463 VPFirstPHISC = VPWidenPHISC,
464 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
465 VPLastHeaderPHISC = VPReductionPHISC,
466 VPLastPHISC = VPReductionPHISC,
467 };
468
469 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
471 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
472
473 ~VPRecipeBase() override = default;
474
475 /// Clone the current recipe.
476 virtual VPRecipeBase *clone() = 0;
477
478 /// \return the VPBasicBlock which this VPRecipe belongs to.
479 VPBasicBlock *getParent() { return Parent; }
480 const VPBasicBlock *getParent() const { return Parent; }
481
482 /// \return the VPRegionBlock which the recipe belongs to.
483 VPRegionBlock *getRegion();
484 const VPRegionBlock *getRegion() const;
485
486 /// The method which generates the output IR instructions that correspond to
487 /// this VPRecipe, thereby "executing" the VPlan.
488 virtual void execute(VPTransformState &State) = 0;
489
490 /// Return the cost of this recipe, taking into account if the cost
491 /// computation should be skipped and the ForceTargetInstructionCost flag.
492 /// Also takes care of printing the cost for debugging.
494
495 /// Insert an unlinked recipe into a basic block immediately before
496 /// the specified recipe.
497 void insertBefore(VPRecipeBase *InsertPos);
498 /// Insert an unlinked recipe into \p BB immediately before the insertion
499 /// point \p IP;
500 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
501
502 /// Insert an unlinked Recipe into a basic block immediately after
503 /// the specified Recipe.
504 void insertAfter(VPRecipeBase *InsertPos);
505
506 /// Unlink this recipe from its current VPBasicBlock and insert it into
507 /// the VPBasicBlock that MovePos lives in, right after MovePos.
508 void moveAfter(VPRecipeBase *MovePos);
509
510 /// Unlink this recipe and insert into BB before I.
511 ///
512 /// \pre I is a valid iterator into BB.
513 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
514
515 /// This method unlinks 'this' from the containing basic block, but does not
516 /// delete it.
517 void removeFromParent();
518
519 /// This method unlinks 'this' from the containing basic block and deletes it.
520 ///
521 /// \returns an iterator pointing to the element after the erased one
523
524 /// \return an ID for the concrete type of this object.
525 unsigned getVPRecipeID() const { return SubclassID; }
526
527 /// Method to support type inquiry through isa, cast, and dyn_cast.
528 static inline bool classof(const VPDef *D) {
529 // All VPDefs are also VPRecipeBases.
530 return true;
531 }
532
533 static inline bool classof(const VPUser *U) { return true; }
534
535 /// Returns true if the recipe may have side-effects.
536 bool mayHaveSideEffects() const;
537
538 /// Return true if we can safely execute this recipe unconditionally even if
539 /// it is masked originally.
540 bool isSafeToSpeculativelyExecute() const;
541
542 /// Returns true for PHI-like recipes.
543 bool isPhi() const;
544
545 /// Returns true if the recipe may read from memory.
546 bool mayReadFromMemory() const;
547
548 /// Returns true if the recipe may write to memory.
549 bool mayWriteToMemory() const;
550
551 /// Returns true if the recipe may read from or write to memory.
552 bool mayReadOrWriteMemory() const {
554 }
555
556 /// Returns the debug location of the recipe.
557 DebugLoc getDebugLoc() const { return DL; }
558
559 /// Return true if the recipe is a scalar cast.
560 bool isScalarCast() const;
561
562 /// Set the recipe's debug location to \p NewDL.
563 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
564
565#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
566 /// Dump the recipe to stderr (for debugging).
567 LLVM_ABI_FOR_TEST void dump() const;
568
569 /// Print the recipe, delegating to printRecipe().
570 void print(raw_ostream &O, const Twine &Indent,
572#endif
573
574protected:
575 /// Compute the cost of this recipe either using a recipe's specialized
576 /// implementation or using the legacy cost model and the underlying
577 /// instructions.
578 virtual InstructionCost computeCost(ElementCount VF,
579 VPCostContext &Ctx) const;
580
581#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
582 /// Each concrete VPRecipe prints itself, without printing common information,
583 /// like debug info or metadata.
584 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
585 VPSlotTracker &SlotTracker) const = 0;
586#endif
587};
588
589// Helper macro to define common classof implementations for recipes.
590#define VP_CLASSOF_IMPL(VPRecipeID) \
591 static inline bool classof(const VPRecipeBase *R) { \
592 return R->getVPRecipeID() == VPRecipeID; \
593 } \
594 static inline bool classof(const VPValue *V) { \
595 auto *R = V->getDefiningRecipe(); \
596 return R && R->getVPRecipeID() == VPRecipeID; \
597 } \
598 static inline bool classof(const VPUser *U) { \
599 auto *R = dyn_cast<VPRecipeBase>(U); \
600 return R && R->getVPRecipeID() == VPRecipeID; \
601 } \
602 static inline bool classof(const VPSingleDefRecipe *R) { \
603 return R->getVPRecipeID() == VPRecipeID; \
604 }
605
606/// VPSingleDef is a base class for recipes for modeling a sequence of one or
607/// more output IR that define a single result VPValue.
608/// Note that VPRecipeBase must be inherited from before VPValue.
610public:
611 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
613 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this) {}
614
615 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
617 : VPRecipeBase(SC, Operands, DL), VPRecipeValue(this, UV) {}
618
619 static inline bool classof(const VPRecipeBase *R) {
620 switch (R->getVPRecipeID()) {
621 case VPRecipeBase::VPDerivedIVSC:
622 case VPRecipeBase::VPExpandSCEVSC:
623 case VPRecipeBase::VPExpressionSC:
624 case VPRecipeBase::VPInstructionSC:
625 case VPRecipeBase::VPReductionEVLSC:
626 case VPRecipeBase::VPReductionSC:
627 case VPRecipeBase::VPReplicateSC:
628 case VPRecipeBase::VPScalarIVStepsSC:
629 case VPRecipeBase::VPVectorPointerSC:
630 case VPRecipeBase::VPVectorEndPointerSC:
631 case VPRecipeBase::VPWidenCallSC:
632 case VPRecipeBase::VPWidenCanonicalIVSC:
633 case VPRecipeBase::VPWidenCastSC:
634 case VPRecipeBase::VPWidenGEPSC:
635 case VPRecipeBase::VPWidenIntrinsicSC:
636 case VPRecipeBase::VPWidenSC:
637 case VPRecipeBase::VPBlendSC:
638 case VPRecipeBase::VPPredInstPHISC:
639 case VPRecipeBase::VPCurrentIterationPHISC:
640 case VPRecipeBase::VPActiveLaneMaskPHISC:
641 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
642 case VPRecipeBase::VPWidenPHISC:
643 case VPRecipeBase::VPWidenIntOrFpInductionSC:
644 case VPRecipeBase::VPWidenPointerInductionSC:
645 case VPRecipeBase::VPReductionPHISC:
646 return true;
647 case VPRecipeBase::VPBranchOnMaskSC:
648 case VPRecipeBase::VPInterleaveEVLSC:
649 case VPRecipeBase::VPInterleaveSC:
650 case VPRecipeBase::VPIRInstructionSC:
651 case VPRecipeBase::VPWidenLoadEVLSC:
652 case VPRecipeBase::VPWidenLoadSC:
653 case VPRecipeBase::VPWidenStoreEVLSC:
654 case VPRecipeBase::VPWidenStoreSC:
655 case VPRecipeBase::VPHistogramSC:
656 // TODO: Widened stores don't define a value, but widened loads do. Split
657 // the recipes to be able to make widened loads VPSingleDefRecipes.
658 return false;
659 }
660 llvm_unreachable("Unhandled VPRecipeID");
661 }
662
663 static inline bool classof(const VPValue *V) {
664 auto *R = V->getDefiningRecipe();
665 return R && classof(R);
666 }
667
668 static inline bool classof(const VPUser *U) {
669 auto *R = dyn_cast<VPRecipeBase>(U);
670 return R && classof(R);
671 }
672
673 VPSingleDefRecipe *clone() override = 0;
674
675 /// Returns the underlying instruction.
682
683#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
684 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
686#endif
687};
688
689/// Class to record and manage LLVM IR flags.
692 enum class OperationType : unsigned char {
693 Cmp,
694 FCmp,
695 OverflowingBinOp,
696 Trunc,
697 DisjointOp,
698 PossiblyExactOp,
699 GEPOp,
700 FPMathOp,
701 NonNegOp,
702 ReductionOp,
703 Other
704 };
705
706public:
707 struct WrapFlagsTy {
708 char HasNUW : 1;
709 char HasNSW : 1;
710
712 };
713
715 char HasNUW : 1;
716 char HasNSW : 1;
717
719 };
720
725
727 char NonNeg : 1;
728 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
729 };
730
731private:
732 struct ExactFlagsTy {
733 char IsExact : 1;
734 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
735 };
736 struct FastMathFlagsTy {
737 char AllowReassoc : 1;
738 char NoNaNs : 1;
739 char NoInfs : 1;
740 char NoSignedZeros : 1;
741 char AllowReciprocal : 1;
742 char AllowContract : 1;
743 char ApproxFunc : 1;
744
745 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
746 };
747 /// Holds both the predicate and fast-math flags for floating-point
748 /// comparisons.
749 struct FCmpFlagsTy {
750 uint8_t CmpPredStorage;
751 FastMathFlagsTy FMFs;
752 };
753 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
754 struct ReductionFlagsTy {
755 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
756 // additional kinds.
757 unsigned char Kind : 6;
758 // TODO: Derive order/in-loop from plan and remove here.
759 unsigned char IsOrdered : 1;
760 unsigned char IsInLoop : 1;
761 FastMathFlagsTy FMFs;
762
763 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
764 FastMathFlags FMFs)
765 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
766 IsInLoop(IsInLoop), FMFs(FMFs) {}
767 };
768
769 OperationType OpType;
770
771 union {
776 ExactFlagsTy ExactFlags;
779 FastMathFlagsTy FMFs;
780 FCmpFlagsTy FCmpFlags;
781 ReductionFlagsTy ReductionFlags;
783 };
784
785public:
786 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
787
789 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
790 OpType = OperationType::FCmp;
792 FCmp->getPredicate());
793 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
794 FCmpFlags.FMFs = FCmp->getFastMathFlags();
795 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
796 OpType = OperationType::Cmp;
798 Op->getPredicate());
799 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
800 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
801 OpType = OperationType::DisjointOp;
802 DisjointFlags.IsDisjoint = Op->isDisjoint();
803 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
804 OpType = OperationType::OverflowingBinOp;
805 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
806 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
807 OpType = OperationType::Trunc;
808 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
809 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
810 OpType = OperationType::PossiblyExactOp;
811 ExactFlags.IsExact = Op->isExact();
812 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
813 OpType = OperationType::GEPOp;
814 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
815 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
816 "wrap flags truncated");
817 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
818 OpType = OperationType::NonNegOp;
819 NonNegFlags.NonNeg = PNNI->hasNonNeg();
820 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
821 OpType = OperationType::FPMathOp;
822 FMFs = Op->getFastMathFlags();
823 }
824 }
825
826 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
828 assert(getPredicate() == Pred && "predicate truncated");
829 }
830
832 : OpType(OperationType::FCmp), AllFlags() {
834 assert(getPredicate() == Pred && "predicate truncated");
835 FCmpFlags.FMFs = FMFs;
836 }
837
839 : OpType(OperationType::OverflowingBinOp), AllFlags() {
840 this->WrapFlags = WrapFlags;
841 }
842
844 : OpType(OperationType::Trunc), AllFlags() {
845 this->TruncFlags = TruncFlags;
846 }
847
848 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
849 this->FMFs = FMFs;
850 }
851
853 : OpType(OperationType::DisjointOp), AllFlags() {
854 this->DisjointFlags = DisjointFlags;
855 }
856
858 : OpType(OperationType::NonNegOp), AllFlags() {
859 this->NonNegFlags = NonNegFlags;
860 }
861
862 VPIRFlags(ExactFlagsTy ExactFlags)
863 : OpType(OperationType::PossiblyExactOp), AllFlags() {
864 this->ExactFlags = ExactFlags;
865 }
866
868 : OpType(OperationType::GEPOp), AllFlags() {
869 GEPFlagsStorage = GEPFlags.getRaw();
870 }
871
872 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
873 : OpType(OperationType::ReductionOp), AllFlags() {
874 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
875 }
876
878 OpType = Other.OpType;
879 AllFlags[0] = Other.AllFlags[0];
880 AllFlags[1] = Other.AllFlags[1];
881 }
882
883 /// Only keep flags also present in \p Other. \p Other must have the same
884 /// OpType as the current object.
885 void intersectFlags(const VPIRFlags &Other);
886
887 /// Drop all poison-generating flags.
889 // NOTE: This needs to be kept in-sync with
890 // Instruction::dropPoisonGeneratingFlags.
891 switch (OpType) {
892 case OperationType::OverflowingBinOp:
893 WrapFlags.HasNUW = false;
894 WrapFlags.HasNSW = false;
895 break;
896 case OperationType::Trunc:
897 TruncFlags.HasNUW = false;
898 TruncFlags.HasNSW = false;
899 break;
900 case OperationType::DisjointOp:
901 DisjointFlags.IsDisjoint = false;
902 break;
903 case OperationType::PossiblyExactOp:
904 ExactFlags.IsExact = false;
905 break;
906 case OperationType::GEPOp:
907 GEPFlagsStorage = 0;
908 break;
909 case OperationType::FPMathOp:
910 case OperationType::FCmp:
911 case OperationType::ReductionOp:
912 getFMFsRef().NoNaNs = false;
913 getFMFsRef().NoInfs = false;
914 break;
915 case OperationType::NonNegOp:
916 NonNegFlags.NonNeg = false;
917 break;
918 case OperationType::Cmp:
919 case OperationType::Other:
920 break;
921 }
922 }
923
924 /// Apply the IR flags to \p I.
925 void applyFlags(Instruction &I) const {
926 switch (OpType) {
927 case OperationType::OverflowingBinOp:
928 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
929 I.setHasNoSignedWrap(WrapFlags.HasNSW);
930 break;
931 case OperationType::Trunc:
932 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
933 I.setHasNoSignedWrap(TruncFlags.HasNSW);
934 break;
935 case OperationType::DisjointOp:
936 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
937 break;
938 case OperationType::PossiblyExactOp:
939 I.setIsExact(ExactFlags.IsExact);
940 break;
941 case OperationType::GEPOp:
942 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
944 break;
945 case OperationType::FPMathOp:
946 case OperationType::FCmp: {
947 const FastMathFlagsTy &F = getFMFsRef();
948 I.setHasAllowReassoc(F.AllowReassoc);
949 I.setHasNoNaNs(F.NoNaNs);
950 I.setHasNoInfs(F.NoInfs);
951 I.setHasNoSignedZeros(F.NoSignedZeros);
952 I.setHasAllowReciprocal(F.AllowReciprocal);
953 I.setHasAllowContract(F.AllowContract);
954 I.setHasApproxFunc(F.ApproxFunc);
955 break;
956 }
957 case OperationType::NonNegOp:
958 I.setNonNeg(NonNegFlags.NonNeg);
959 break;
960 case OperationType::ReductionOp:
961 llvm_unreachable("reduction ops should not use applyFlags");
962 case OperationType::Cmp:
963 case OperationType::Other:
964 break;
965 }
966 }
967
969 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
970 "recipe doesn't have a compare predicate");
971 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
974 }
975
977 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
978 "recipe doesn't have a compare predicate");
979 if (OpType == OperationType::FCmp)
981 else
983 assert(getPredicate() == Pred && "predicate truncated");
984 }
985
989
990 /// Returns true if the recipe has a comparison predicate.
991 bool hasPredicate() const {
992 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
993 }
994
995 /// Returns true if the recipe has fast-math flags.
996 bool hasFastMathFlags() const {
997 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
998 OpType == OperationType::ReductionOp;
999 }
1000
1002
1003 /// Returns true if the recipe has non-negative flag.
1004 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1005
1006 bool isNonNeg() const {
1007 assert(OpType == OperationType::NonNegOp &&
1008 "recipe doesn't have a NNEG flag");
1009 return NonNegFlags.NonNeg;
1010 }
1011
1012 bool hasNoUnsignedWrap() const {
1013 switch (OpType) {
1014 case OperationType::OverflowingBinOp:
1015 return WrapFlags.HasNUW;
1016 case OperationType::Trunc:
1017 return TruncFlags.HasNUW;
1018 default:
1019 llvm_unreachable("recipe doesn't have a NUW flag");
1020 }
1021 }
1022
1023 bool hasNoSignedWrap() const {
1024 switch (OpType) {
1025 case OperationType::OverflowingBinOp:
1026 return WrapFlags.HasNSW;
1027 case OperationType::Trunc:
1028 return TruncFlags.HasNSW;
1029 default:
1030 llvm_unreachable("recipe doesn't have a NSW flag");
1031 }
1032 }
1033
1034 bool hasNoWrapFlags() const {
1035 switch (OpType) {
1036 case OperationType::OverflowingBinOp:
1037 case OperationType::Trunc:
1038 return true;
1039 default:
1040 return false;
1041 }
1042 }
1043
1045 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1046 }
1047
1048 bool isDisjoint() const {
1049 assert(OpType == OperationType::DisjointOp &&
1050 "recipe cannot have a disjoing flag");
1051 return DisjointFlags.IsDisjoint;
1052 }
1053
1055 assert(OpType == OperationType::ReductionOp &&
1056 "recipe doesn't have reduction flags");
1057 return static_cast<RecurKind>(ReductionFlags.Kind);
1058 }
1059
1060 bool isReductionOrdered() const {
1061 assert(OpType == OperationType::ReductionOp &&
1062 "recipe doesn't have reduction flags");
1063 return ReductionFlags.IsOrdered;
1064 }
1065
1066 bool isReductionInLoop() const {
1067 assert(OpType == OperationType::ReductionOp &&
1068 "recipe doesn't have reduction flags");
1069 return ReductionFlags.IsInLoop;
1070 }
1071
1072private:
1073 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1074 FastMathFlagsTy &getFMFsRef() {
1075 if (OpType == OperationType::FCmp)
1076 return FCmpFlags.FMFs;
1077 if (OpType == OperationType::ReductionOp)
1078 return ReductionFlags.FMFs;
1079 return FMFs;
1080 }
1081 const FastMathFlagsTy &getFMFsRef() const {
1082 if (OpType == OperationType::FCmp)
1083 return FCmpFlags.FMFs;
1084 if (OpType == OperationType::ReductionOp)
1085 return ReductionFlags.FMFs;
1086 return FMFs;
1087 }
1088
1089public:
1090 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1091 /// otherwise. Opcodes not supporting default flags include compares and
1092 /// ComputeReductionResult.
1093 static VPIRFlags getDefaultFlags(unsigned Opcode);
1094
1095#if !defined(NDEBUG)
1096 /// Returns true if the set flags are valid for \p Opcode.
1097 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1098
1099 /// Returns true if \p Opcode has its required flags set.
1100 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1101#endif
1102
1103#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1104 void printFlags(raw_ostream &O) const;
1105#endif
1106};
1108
1109static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1110
1111/// A pure-virtual common base class for recipes defining a single VPValue and
1112/// using IR flags.
1114 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1115 const VPIRFlags &Flags,
1117 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1118
1119 static inline bool classof(const VPRecipeBase *R) {
1120 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1121 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1122 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1123 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1124 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1125 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1126 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1127 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1128 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1129 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1130 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC;
1132 }
1133
1134 static inline bool classof(const VPUser *U) {
1135 auto *R = dyn_cast<VPRecipeBase>(U);
1136 return R && classof(R);
1137 }
1138
1139 static inline bool classof(const VPValue *V) {
1140 auto *R = V->getDefiningRecipe();
1141 return R && classof(R);
1142 }
1143
1145
1146 static inline bool classof(const VPSingleDefRecipe *R) {
1147 return classof(static_cast<const VPRecipeBase *>(R));
1148 }
1149
1150 void execute(VPTransformState &State) override = 0;
1151
1152 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1154 VPCostContext &Ctx) const;
1155};
1156
1157/// Helper to access the operand that contains the unroll part for this recipe
1158/// after unrolling.
1159template <unsigned PartOpIdx> class LLVM_ABI_FOR_TEST VPUnrollPartAccessor {
1160protected:
1161 /// Return the VPValue operand containing the unroll part or null if there is
1162 /// no such operand.
1163 VPValue *getUnrollPartOperand(const VPUser &U) const;
1164
1165 /// Return the unroll part.
1166 unsigned getUnrollPart(const VPUser &U) const;
1167};
1168
1169/// Helper to manage IR metadata for recipes. It filters out metadata that
1170/// cannot be propagated.
1173
1174public:
1175 VPIRMetadata() = default;
1176
1177 /// Adds metatadata that can be preserved from the original instruction
1178 /// \p I.
1180
1181 /// Copy constructor for cloning.
1183
1185
1186 /// Add all metadata to \p I.
1187 void applyMetadata(Instruction &I) const;
1188
1189 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1190 /// already exists, it will be replaced. Otherwise, it will be added.
1191 void setMetadata(unsigned Kind, MDNode *Node) {
1192 auto It =
1193 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1194 return P.first == Kind;
1195 });
1196 if (It != Metadata.end())
1197 It->second = Node;
1198 else
1199 Metadata.emplace_back(Kind, Node);
1200 }
1201
1202 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1203 /// nodes that are common to both.
1204 void intersect(const VPIRMetadata &MD);
1205
1206 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1207 MDNode *getMetadata(unsigned Kind) const {
1208 auto It =
1209 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1210 return It != Metadata.end() ? It->second : nullptr;
1211 }
1212
1213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1214 /// Print metadata with node IDs.
1215 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1216#endif
1217};
1218
1219/// This is a concrete Recipe that models a single VPlan-level instruction.
1220/// While as any Recipe it may generate a sequence of IR instructions when
1221/// executed, these instructions would always form a single-def expression as
1222/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1223/// opcodes can take an optional mask. Masks may be assigned during
1224/// predication.
1226 public VPIRMetadata {
1227public:
1228 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1229 enum {
1231 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1232 // values of a first-order recurrence.
1234 // Creates a mask where each lane is active (true) whilst the current
1235 // counter (first operand + index) is less than the second operand. i.e.
1236 // mask[i] = icmpt ult (op0 + i), op1
1237 // The size of the mask returned is VF * Multiplier (UF, third op).
1241 // Increment the canonical IV separately for each unrolled part.
1243 // Abstract instruction that compares two values and branches. This is
1244 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1247 // Branch with 2 boolean condition operands and 3 successors. If condition
1248 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1249 // successor 1; otherwise branches to successor 2. Expanded after region
1250 // dissolution into: (1) an OR of the two conditions branching to
1251 // middle.split or successor 2, and (2) middle.split branching to successor
1252 // 0 or successor 1 based on condition 0.
1255 /// Given operands of (the same) struct type, creates a struct of fixed-
1256 /// width vectors each containing a struct field of all operands. The
1257 /// number of operands matches the element count of every vector.
1259 /// Creates a fixed-width vector containing all operands. The number of
1260 /// operands matches the vector element count.
1262 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1263 /// abstract VPInstruction whose single defined VPValue represents VF
1264 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1265 /// VPInstructions.
1267 /// Reduce the operands to the final reduction result using the operation
1268 /// specified via the operation's VPIRFlags.
1270 // Extracts the last part of its operand. Removed during unrolling.
1272 // Extracts the last lane of its vector operand, per part.
1274 // Extracts the second-to-last lane from its operand or the second-to-last
1275 // part if it is scalar. In the latter case, the recipe will be removed
1276 // during unrolling.
1278 LogicalAnd, // Non-poison propagating logical And.
1279 LogicalOr, // Non-poison propagating logical Or.
1280 // Add an offset in bytes (second operand) to a base pointer (first
1281 // operand). Only generates scalar values (either for the first lane only or
1282 // for all lanes, depending on its uses).
1284 // Add a vector offset in bytes (second operand) to a scalar base pointer
1285 // (first operand).
1287 // Returns a scalar boolean value, which is true if any lane of its
1288 // (boolean) vector operands is true. It produces the reduced value across
1289 // all unrolled iterations. Unrolling will add all copies of its original
1290 // operand as additional operands. AnyOf is poison-safe as all operands
1291 // will be frozen.
1293 // Calculates the first active lane index of the vector predicate operands.
1294 // It produces the lane index across all unrolled iterations. Unrolling will
1295 // add all copies of its original operand as additional operands.
1296 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1297 // result even with operands that are all zeroes.
1299 // Calculates the last active lane index of the vector predicate operands.
1300 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1301 // tail-folding to extract the correct live-out value from the last active
1302 // iteration. It produces the lane index across all unrolled iterations.
1303 // Unrolling will add all copies of its original operand as additional
1304 // operands.
1306 // Returns a reversed vector for the operand.
1308
1309 // The opcodes below are used for VPInstructionWithType.
1310 //
1311 /// Scale the first operand (vector step) by the second operand
1312 /// (scalar-step). Casts both operands to the result type if needed.
1314 /// Start vector for reductions with 3 operands: the original start value,
1315 /// the identity value for the reduction and an integer indicating the
1316 /// scaling factor.
1318 // Creates a step vector starting from 0 to VF with a step of 1.
1320 /// Extracts a single lane (first operand) from a set of vector operands.
1321 /// The lane specifies an index into a vector formed by combining all vector
1322 /// operands (all operands after the first one).
1324 /// Explicit user for the resume phi of the canonical induction in the main
1325 /// VPlan, used by the epilogue vector loop.
1327 /// Extracts the last active lane from a set of vectors. The first operand
1328 /// is the default value if no lanes in the masks are active. Conceptually,
1329 /// this concatenates all data vectors (odd operands), concatenates all
1330 /// masks (even operands -- ignoring the default value), and returns the
1331 /// last active value from the combined data vector using the combined mask.
1333
1334 /// Returns the value for vscale.
1336 /// Compute the exiting value of a wide induction after vectorization, that
1337 /// is the value of the last lane of the induction increment (i.e. its
1338 /// backedge value). Has the wide induction recipe as operand.
1342 };
1343
1344 /// Returns true if this VPInstruction generates scalar values for all lanes.
1345 /// Most VPInstructions generate a single value per part, either vector or
1346 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1347 /// values per all lanes, stemming from an original ingredient. This method
1348 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1349 /// underlying ingredient.
1350 bool doesGeneratePerAllLanes() const;
1351
1352 /// Return the number of operands determined by the opcode of the
1353 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1354 /// cannot be determined directly by the opcode.
1355 unsigned getNumOperandsForOpcode() const;
1356
1357private:
1358 typedef unsigned char OpcodeTy;
1359 OpcodeTy Opcode;
1360
1361 /// An optional name that can be used for the generated IR instruction.
1362 std::string Name;
1363
1364 /// Returns true if we can generate a scalar for the first lane only if
1365 /// needed.
1366 bool canGenerateScalarForFirstLane() const;
1367
1368 /// Utility methods serving execute(): generates a single vector instance of
1369 /// the modeled instruction. \returns the generated value. . In some cases an
1370 /// existing value is returned rather than a generated one.
1371 Value *generate(VPTransformState &State);
1372
1373 /// Returns true if the VPInstruction does not need masking.
1374 bool alwaysUnmasked() const {
1375 if (Opcode == VPInstruction::MaskedCond)
1376 return false;
1377
1378 // For now only VPInstructions with underlying values use masks.
1379 // TODO: provide masks to VPInstructions w/o underlying values.
1380 if (!getUnderlyingValue())
1381 return true;
1382
1383 return Opcode == Instruction::PHI || Opcode == Instruction::GetElementPtr;
1384 }
1385
1386public:
1387 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1388 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1389 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1390
1391 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1392
1393 VPInstruction *clone() override {
1394 auto *New = new VPInstruction(Opcode, operands(), *this, *this,
1395 getDebugLoc(), Name);
1396 if (getUnderlyingValue())
1397 New->setUnderlyingValue(getUnderlyingInstr());
1398 return New;
1399 }
1400
1401 unsigned getOpcode() const { return Opcode; }
1402
1403 /// Generate the instruction.
1404 /// TODO: We currently execute only per-part unless a specific instance is
1405 /// provided.
1406 void execute(VPTransformState &State) override;
1407
1408 /// Return the cost of this VPInstruction.
1409 InstructionCost computeCost(ElementCount VF,
1410 VPCostContext &Ctx) const override;
1411
1412#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1413 /// Print the VPInstruction to dbgs() (for debugging).
1414 LLVM_DUMP_METHOD void dump() const;
1415#endif
1416
1417 bool hasResult() const {
1418 // CallInst may or may not have a result, depending on the called function.
1419 // Conservatively return calls have results for now.
1420 switch (getOpcode()) {
1421 case Instruction::Ret:
1422 case Instruction::UncondBr:
1423 case Instruction::CondBr:
1424 case Instruction::Store:
1425 case Instruction::Switch:
1426 case Instruction::IndirectBr:
1427 case Instruction::Resume:
1428 case Instruction::CatchRet:
1429 case Instruction::Unreachable:
1430 case Instruction::Fence:
1431 case Instruction::AtomicRMW:
1435 return false;
1436 default:
1437 return true;
1438 }
1439 }
1440
1441 /// Returns true if the VPInstruction has a mask operand.
1442 bool isMasked() const {
1443 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1444 // VPInstructions without a fixed number of operands cannot be masked.
1445 if (NumOpsForOpcode == -1u)
1446 return false;
1447 return NumOpsForOpcode + 1 == getNumOperands();
1448 }
1449
1450 /// Returns the number of operands, excluding the mask if the VPInstruction is
1451 /// masked.
1452 unsigned getNumOperandsWithoutMask() const {
1453 return getNumOperands() - isMasked();
1454 }
1455
1456 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1457 void addMask(VPValue *Mask) {
1458 assert(!isMasked() && "recipe is already masked");
1459 if (alwaysUnmasked())
1460 return;
1461 addOperand(Mask);
1462 }
1463
1464 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1465 /// VPInstructions.
1466 VPValue *getMask() const {
1467 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1468 }
1469
1470 /// Returns an iterator range over the operands excluding the mask operand
1471 /// if present.
1478
1479 /// Returns true if the underlying opcode may read from or write to memory.
1480 bool opcodeMayReadOrWriteFromMemory() const;
1481
1482 /// Returns true if the recipe only uses the first lane of operand \p Op.
1483 bool usesFirstLaneOnly(const VPValue *Op) const override;
1484
1485 /// Returns true if the recipe only uses the first part of operand \p Op.
1486 bool usesFirstPartOnly(const VPValue *Op) const override;
1487
1488 /// Returns true if this VPInstruction produces a scalar value from a vector,
1489 /// e.g. by performing a reduction or extracting a lane.
1490 bool isVectorToScalar() const;
1491
1492 /// Returns true if this VPInstruction's operands are single scalars and the
1493 /// result is also a single scalar.
1494 bool isSingleScalar() const;
1495
1496 /// Returns the symbolic name assigned to the VPInstruction.
1497 StringRef getName() const { return Name; }
1498
1499 /// Set the symbolic name for the VPInstruction.
1500 void setName(StringRef NewName) { Name = NewName.str(); }
1501
1502protected:
1503#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1504 /// Print the VPInstruction to \p O.
1505 void printRecipe(raw_ostream &O, const Twine &Indent,
1506 VPSlotTracker &SlotTracker) const override;
1507#endif
1508};
1509
1510/// A specialization of VPInstruction augmenting it with a dedicated result
1511/// type, to be used when the opcode and operands of the VPInstruction don't
1512/// directly determine the result type. Note that there is no separate recipe ID
1513/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1514/// distinguished purely by the opcode.
1516 /// Scalar result type produced by the recipe.
1517 Type *ResultTy;
1518
1519public:
1521 Type *ResultTy, const VPIRFlags &Flags = {},
1522 const VPIRMetadata &Metadata = {},
1524 const Twine &Name = "")
1525 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1526 ResultTy(ResultTy) {}
1527
1528 static inline bool classof(const VPRecipeBase *R) {
1529 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1530 // type information.
1531 if (R->isScalarCast())
1532 return true;
1533 auto *VPI = dyn_cast<VPInstruction>(R);
1534 if (!VPI)
1535 return false;
1536 switch (VPI->getOpcode()) {
1540 case Instruction::Load:
1541 return true;
1542 default:
1543 return false;
1544 }
1545 }
1546
1547 static inline bool classof(const VPUser *R) {
1549 }
1550
1551 VPInstruction *clone() override {
1552 auto *New =
1554 *this, *this, getDebugLoc(), getName());
1555 New->setUnderlyingValue(getUnderlyingValue());
1556 return New;
1557 }
1558
1559 void execute(VPTransformState &State) override;
1560
1561 /// Return the cost of this VPInstruction.
1563 VPCostContext &Ctx) const override {
1564 // TODO: Compute accurate cost after retiring the legacy cost model.
1565 return 0;
1566 }
1567
1568 Type *getResultType() const { return ResultTy; }
1569
1570protected:
1571#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1572 /// Print the recipe.
1573 void printRecipe(raw_ostream &O, const Twine &Indent,
1574 VPSlotTracker &SlotTracker) const override;
1575#endif
1576};
1577
1578/// Helper type to provide functions to access incoming values and blocks for
1579/// phi-like recipes.
1581protected:
1582 /// Return a VPRecipeBase* to the current object.
1583 virtual const VPRecipeBase *getAsRecipe() const = 0;
1584
1585public:
1586 virtual ~VPPhiAccessors() = default;
1587
1588 /// Returns the incoming VPValue with index \p Idx.
1589 VPValue *getIncomingValue(unsigned Idx) const {
1590 return getAsRecipe()->getOperand(Idx);
1591 }
1592
1593 /// Returns the incoming block with index \p Idx.
1594 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1595
1596 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1597 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1598
1599 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1600 /// block.
1601 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1602
1603 /// Returns the number of incoming values, also number of incoming blocks.
1604 virtual unsigned getNumIncoming() const {
1605 return getAsRecipe()->getNumOperands();
1606 }
1607
1608 /// Returns an interator range over the incoming values.
1610 return make_range(getAsRecipe()->op_begin(),
1611 getAsRecipe()->op_begin() + getNumIncoming());
1612 }
1613
1615 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1616
1617 /// Returns an iterator range over the incoming blocks.
1619 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1620 return getIncomingBlock(Idx);
1621 };
1622 return map_range(index_range(0, getNumIncoming()), GetBlock);
1623 }
1624
1625 /// Returns an iterator range over pairs of incoming values and corresponding
1626 /// incoming blocks.
1632
1633 /// Removes the incoming value for \p IncomingBlock, which must be a
1634 /// predecessor.
1635 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1636
1637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1638 /// Print the recipe.
1640#endif
1641};
1642
1645 const Twine &Name = "")
1646 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1647
1648 static inline bool classof(const VPUser *U) {
1649 auto *VPI = dyn_cast<VPInstruction>(U);
1650 return VPI && VPI->getOpcode() == Instruction::PHI;
1651 }
1652
1653 static inline bool classof(const VPValue *V) {
1654 auto *VPI = dyn_cast<VPInstruction>(V);
1655 return VPI && VPI->getOpcode() == Instruction::PHI;
1656 }
1657
1658 static inline bool classof(const VPSingleDefRecipe *SDR) {
1659 auto *VPI = dyn_cast<VPInstruction>(SDR);
1660 return VPI && VPI->getOpcode() == Instruction::PHI;
1661 }
1662
1663 VPPhi *clone() override {
1664 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1665 PhiR->setUnderlyingValue(getUnderlyingValue());
1666 return PhiR;
1667 }
1668
1669 void execute(VPTransformState &State) override;
1670
1671protected:
1672#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1673 /// Print the recipe.
1674 void printRecipe(raw_ostream &O, const Twine &Indent,
1675 VPSlotTracker &SlotTracker) const override;
1676#endif
1677
1678 const VPRecipeBase *getAsRecipe() const override { return this; }
1679};
1680
1681/// A recipe to wrap on original IR instruction not to be modified during
1682/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1683/// Expect PHIs, VPIRInstructions cannot have any operands.
1685 Instruction &I;
1686
1687protected:
1688 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1689 /// subclasses may need to be created, e.g. VPIRPhi.
1691 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1692
1693public:
1694 ~VPIRInstruction() override = default;
1695
1696 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1697 /// VPIRInstruction.
1699
1700 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1701
1703 auto *R = create(I);
1704 for (auto *Op : operands())
1705 R->addOperand(Op);
1706 return R;
1707 }
1708
1709 void execute(VPTransformState &State) override;
1710
1711 /// Return the cost of this VPIRInstruction.
1713 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1714
1715 Instruction &getInstruction() const { return I; }
1716
1717 bool usesScalars(const VPValue *Op) const override {
1719 "Op must be an operand of the recipe");
1720 return true;
1721 }
1722
1723 bool usesFirstPartOnly(const VPValue *Op) const override {
1725 "Op must be an operand of the recipe");
1726 return true;
1727 }
1728
1729 bool usesFirstLaneOnly(const VPValue *Op) const override {
1731 "Op must be an operand of the recipe");
1732 return true;
1733 }
1734
1735protected:
1736#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1737 /// Print the recipe.
1738 void printRecipe(raw_ostream &O, const Twine &Indent,
1739 VPSlotTracker &SlotTracker) const override;
1740#endif
1741};
1742
1743/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1744/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1745/// allowed, and it is used to add a new incoming value for the single
1746/// predecessor VPBB.
1748 public VPPhiAccessors {
1750
1751 static inline bool classof(const VPRecipeBase *U) {
1752 auto *R = dyn_cast<VPIRInstruction>(U);
1753 return R && isa<PHINode>(R->getInstruction());
1754 }
1755
1756 static inline bool classof(const VPUser *U) {
1757 auto *R = dyn_cast<VPRecipeBase>(U);
1758 return R && classof(R);
1759 }
1760
1762
1763 void execute(VPTransformState &State) override;
1764
1765protected:
1766#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1767 /// Print the recipe.
1768 void printRecipe(raw_ostream &O, const Twine &Indent,
1769 VPSlotTracker &SlotTracker) const override;
1770#endif
1771
1772 const VPRecipeBase *getAsRecipe() const override { return this; }
1773};
1774
1775/// VPWidenRecipe is a recipe for producing a widened instruction using the
1776/// opcode and operands of the recipe. This recipe covers most of the
1777/// traditional vectorization cases where each recipe transforms into a
1778/// vectorized version of itself.
1780 public VPIRMetadata {
1781 unsigned Opcode;
1782
1783public:
1785 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1786 DebugLoc DL = {})
1787 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1788 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1789 setUnderlyingValue(&I);
1790 }
1791
1792 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1793 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1794 DebugLoc DL = {})
1795 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1796 VPIRMetadata(Metadata), Opcode(Opcode) {}
1797
1798 ~VPWidenRecipe() override = default;
1799
1800 VPWidenRecipe *clone() override {
1801 if (auto *UV = getUnderlyingValue())
1802 return new VPWidenRecipe(*cast<Instruction>(UV), operands(), *this, *this,
1803 getDebugLoc());
1804 return new VPWidenRecipe(Opcode, operands(), *this, *this, getDebugLoc());
1805 }
1806
1807 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1808
1809 /// Produce a widened instruction using the opcode and operands of the recipe,
1810 /// processing State.VF elements.
1811 void execute(VPTransformState &State) override;
1812
1813 /// Return the cost of this VPWidenRecipe.
1814 InstructionCost computeCost(ElementCount VF,
1815 VPCostContext &Ctx) const override;
1816
1817 unsigned getOpcode() const { return Opcode; }
1818
1819protected:
1820#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1821 /// Print the recipe.
1822 void printRecipe(raw_ostream &O, const Twine &Indent,
1823 VPSlotTracker &SlotTracker) const override;
1824#endif
1825
1826 /// Returns true if the recipe only uses the first lane of operand \p Op.
1827 bool usesFirstLaneOnly(const VPValue *Op) const override {
1829 "Op must be an operand of the recipe");
1830 return Opcode == Instruction::Select && Op == getOperand(0) &&
1831 Op->isDefinedOutsideLoopRegions();
1832 }
1833};
1834
1835/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1837 /// Cast instruction opcode.
1838 Instruction::CastOps Opcode;
1839
1840 /// Result type for the cast.
1841 Type *ResultTy;
1842
1843public:
1845 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1846 const VPIRMetadata &Metadata = {},
1848 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, Flags, DL),
1849 VPIRMetadata(Metadata), Opcode(Opcode), ResultTy(ResultTy) {
1850 assert(flagsValidForOpcode(Opcode) &&
1851 "Set flags not supported for the provided opcode");
1853 "Opcode requires specific flags to be set");
1855 }
1856
1857 ~VPWidenCastRecipe() override = default;
1858
1860 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1862 *this, *this, getDebugLoc());
1863 }
1864
1865 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1866
1867 /// Produce widened copies of the cast.
1868 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1869
1870 /// Return the cost of this VPWidenCastRecipe.
1872 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1873
1874 Instruction::CastOps getOpcode() const { return Opcode; }
1875
1876 /// Returns the result type of the cast.
1877 Type *getResultType() const { return ResultTy; }
1878
1879protected:
1880#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1881 /// Print the recipe.
1882 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1883 VPSlotTracker &SlotTracker) const override;
1884#endif
1885};
1886
1887/// A recipe for widening vector intrinsics.
1889 /// ID of the vector intrinsic to widen.
1890 Intrinsic::ID VectorIntrinsicID;
1891
1892 /// Scalar return type of the intrinsic.
1893 Type *ResultTy;
1894
1895 /// True if the intrinsic may read from memory.
1896 bool MayReadFromMemory;
1897
1898 /// True if the intrinsic may read write to memory.
1899 bool MayWriteToMemory;
1900
1901 /// True if the intrinsic may have side-effects.
1902 bool MayHaveSideEffects;
1903
1904public:
1906 ArrayRef<VPValue *> CallArguments, Type *Ty,
1907 const VPIRFlags &Flags = {},
1908 const VPIRMetadata &MD = {},
1910 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1911 Flags, DL),
1912 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1913 MayReadFromMemory(CI.mayReadFromMemory()),
1914 MayWriteToMemory(CI.mayWriteToMemory()),
1915 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1916 setUnderlyingValue(&CI);
1917 }
1918
1920 ArrayRef<VPValue *> CallArguments, Type *Ty,
1921 const VPIRFlags &Flags = {},
1922 const VPIRMetadata &Metadata = {},
1924 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments,
1925 Flags, DL),
1926 VPIRMetadata(Metadata), VectorIntrinsicID(VectorIntrinsicID),
1927 ResultTy(Ty) {
1928 LLVMContext &Ctx = Ty->getContext();
1929 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1930 MemoryEffects ME = Attrs.getMemoryEffects();
1931 MayReadFromMemory = !ME.onlyWritesMemory();
1932 MayWriteToMemory = !ME.onlyReadsMemory();
1933 MayHaveSideEffects = MayWriteToMemory ||
1934 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1935 !Attrs.hasAttribute(Attribute::WillReturn);
1936 }
1937
1938 ~VPWidenIntrinsicRecipe() override = default;
1939
1941 if (Value *CI = getUnderlyingValue())
1942 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1943 operands(), ResultTy, *this, *this,
1944 getDebugLoc());
1945 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(), ResultTy,
1946 *this, *this, getDebugLoc());
1947 }
1948
1949 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntrinsicSC)
1950
1951 /// Produce a widened version of the vector intrinsic.
1952 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1953
1954 /// Return the cost of this vector intrinsic.
1956 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1957
1958 /// Return the ID of the intrinsic.
1959 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1960
1961 /// Return the scalar return type of the intrinsic.
1962 Type *getResultType() const { return ResultTy; }
1963
1964 /// Return to name of the intrinsic as string.
1966
1967 /// Returns true if the intrinsic may read from memory.
1968 bool mayReadFromMemory() const { return MayReadFromMemory; }
1969
1970 /// Returns true if the intrinsic may write to memory.
1971 bool mayWriteToMemory() const { return MayWriteToMemory; }
1972
1973 /// Returns true if the intrinsic may have side-effects.
1974 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1975
1976 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
1977
1978protected:
1979#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1980 /// Print the recipe.
1981 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1982 VPSlotTracker &SlotTracker) const override;
1983#endif
1984};
1985
1986/// A recipe for widening Call instructions using library calls.
1988 public VPIRMetadata {
1989 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1990 /// between a given VF and the chosen vectorized variant, so there will be a
1991 /// different VPlan for each VF with a valid variant.
1992 Function *Variant;
1993
1994public:
1996 ArrayRef<VPValue *> CallArguments,
1997 const VPIRFlags &Flags = {},
1998 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
1999 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments, Flags,
2000 DL),
2001 VPIRMetadata(Metadata), Variant(Variant) {
2002 setUnderlyingValue(UV);
2003 assert(
2004 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2005 "last operand must be the called function");
2006 }
2007
2008 ~VPWidenCallRecipe() override = default;
2009
2011 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2012 *this, *this, getDebugLoc());
2013 }
2014
2015 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2016
2017 /// Produce a widened version of the call instruction.
2018 void execute(VPTransformState &State) override;
2019
2020 /// Return the cost of this VPWidenCallRecipe.
2021 InstructionCost computeCost(ElementCount VF,
2022 VPCostContext &Ctx) const override;
2023
2027
2030
2031protected:
2032#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2033 /// Print the recipe.
2034 void printRecipe(raw_ostream &O, const Twine &Indent,
2035 VPSlotTracker &SlotTracker) const override;
2036#endif
2037};
2038
2039/// A recipe representing a sequence of load -> update -> store as part of
2040/// a histogram operation. This means there may be aliasing between vector
2041/// lanes, which is handled by the llvm.experimental.vector.histogram family
2042/// of intrinsics. The only update operations currently supported are
2043/// 'add' and 'sub' where the other term is loop-invariant.
2045 /// Opcode of the update operation, currently either add or sub.
2046 unsigned Opcode;
2047
2048public:
2049 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2051 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2052 Opcode(Opcode) {}
2053
2054 ~VPHistogramRecipe() override = default;
2055
2057 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2058 }
2059
2060 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2061
2062 /// Produce a vectorized histogram operation.
2063 void execute(VPTransformState &State) override;
2064
2065 /// Return the cost of this VPHistogramRecipe.
2067 VPCostContext &Ctx) const override;
2068
2069 unsigned getOpcode() const { return Opcode; }
2070
2071 /// Return the mask operand if one was provided, or a null pointer if all
2072 /// lanes should be executed unconditionally.
2073 VPValue *getMask() const {
2074 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2075 }
2076
2077protected:
2078#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2079 /// Print the recipe
2080 void printRecipe(raw_ostream &O, const Twine &Indent,
2081 VPSlotTracker &SlotTracker) const override;
2082#endif
2083};
2084
2085/// A recipe for handling GEP instructions.
2087 Type *SourceElementTy;
2088
2089 bool isPointerLoopInvariant() const {
2090 return getOperand(0)->isDefinedOutsideLoopRegions();
2091 }
2092
2093 bool isIndexLoopInvariant(unsigned I) const {
2094 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2095 }
2096
2097public:
2099 const VPIRFlags &Flags = {},
2101 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands, Flags, DL),
2102 SourceElementTy(GEP->getSourceElementType()) {
2103 setUnderlyingValue(GEP);
2105 (void)Metadata;
2107 assert(Metadata.empty() && "unexpected metadata on GEP");
2108 }
2109
2110 ~VPWidenGEPRecipe() override = default;
2111
2114 operands(), *this, getDebugLoc());
2115 }
2116
2117 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2118
2119 /// This recipe generates a GEP instruction.
2120 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2121
2122 /// Generate the gep nodes.
2123 void execute(VPTransformState &State) override;
2124
2125 Type *getSourceElementType() const { return SourceElementTy; }
2126
2127 /// Return the cost of this VPWidenGEPRecipe.
2129 VPCostContext &Ctx) const override {
2130 // TODO: Compute accurate cost after retiring the legacy cost model.
2131 return 0;
2132 }
2133
2134 /// Returns true if the recipe only uses the first lane of operand \p Op.
2135 bool usesFirstLaneOnly(const VPValue *Op) const override;
2136
2137protected:
2138#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2139 /// Print the recipe.
2140 void printRecipe(raw_ostream &O, const Twine &Indent,
2141 VPSlotTracker &SlotTracker) const override;
2142#endif
2143};
2144
2145/// A recipe to compute a pointer to the last element of each part of a widened
2146/// memory access for widened memory accesses of SourceElementTy. Used for
2147/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2148/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2149/// unroller otherwise.
2151 Type *SourceElementTy;
2152
2153 /// The constant stride of the pointer computed by this recipe, expressed in
2154 /// units of SourceElementTy.
2155 int64_t Stride;
2156
2157public:
2158 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2159 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2160 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2161 GEPFlags, DL),
2162 SourceElementTy(SourceElementTy), Stride(Stride) {
2163 assert(Stride < 0 && "Stride must be negative");
2164 }
2165
2166 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2167
2168 Type *getSourceElementType() const { return SourceElementTy; }
2169 int64_t getStride() const { return Stride; }
2170 VPValue *getPointer() const { return getOperand(0); }
2171 VPValue *getVFValue() const { return getOperand(1); }
2173 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2174 }
2175
2176 /// Adds the offset operand to the recipe.
2177 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2178 void materializeOffset(unsigned Part = 0);
2179
2180 void execute(VPTransformState &State) override;
2181
2182 bool usesFirstLaneOnly(const VPValue *Op) const override {
2184 "Op must be an operand of the recipe");
2185 return true;
2186 }
2187
2188 /// Return the cost of this VPVectorPointerRecipe.
2190 VPCostContext &Ctx) const override {
2191 // TODO: Compute accurate cost after retiring the legacy cost model.
2192 return 0;
2193 }
2194
2195 /// Returns true if the recipe only uses the first part of operand \p Op.
2196 bool usesFirstPartOnly(const VPValue *Op) const override {
2198 "Op must be an operand of the recipe");
2199 assert(getNumOperands() <= 2 && "must have at most two operands");
2200 return true;
2201 }
2202
2204 auto *VEPR = new VPVectorEndPointerRecipe(
2207 if (auto *Offset = getOffset())
2208 VEPR->addOperand(Offset);
2209 return VEPR;
2210 }
2211
2212protected:
2213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2214 /// Print the recipe.
2215 void printRecipe(raw_ostream &O, const Twine &Indent,
2216 VPSlotTracker &SlotTracker) const override;
2217#endif
2218};
2219
2220/// A recipe to compute the pointers for widened memory accesses of \p
2221/// SourceElementTy. Unrolling adds an extra offset operand for unrolled parts >
2222/// 0 and it produces `GEP Ptr, Offset`. The offset for unrolled part 0 is 0.
2224 Type *SourceElementTy;
2225
2226public:
2227 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy,
2228 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2229 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC, Ptr, GEPFlags, DL),
2230 SourceElementTy(SourceElementTy) {}
2231
2232 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2233
2235 return getNumOperands() == 2 ? getOperand(1) : nullptr;
2236 }
2237
2238 void execute(VPTransformState &State) override;
2239
2240 Type *getSourceElementType() const { return SourceElementTy; }
2241
2242 bool usesFirstLaneOnly(const VPValue *Op) const override {
2244 "Op must be an operand of the recipe");
2245 return true;
2246 }
2247
2248 /// Returns true if the recipe only uses the first part of operand \p Op.
2249 bool usesFirstPartOnly(const VPValue *Op) const override {
2251 "Op must be an operand of the recipe");
2252 assert(getNumOperands() <= 2 && "must have at most two operands");
2253 return true;
2254 }
2255
2257 auto *Clone = new VPVectorPointerRecipe(getOperand(0), SourceElementTy,
2259 if (auto *Off = getOffset())
2260 Clone->addOperand(Off);
2261 return Clone;
2262 }
2263
2264 /// Return the cost of this VPHeaderPHIRecipe.
2266 VPCostContext &Ctx) const override {
2267 // TODO: Compute accurate cost after retiring the legacy cost model.
2268 return 0;
2269 }
2270
2271protected:
2272#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2273 /// Print the recipe.
2274 void printRecipe(raw_ostream &O, const Twine &Indent,
2275 VPSlotTracker &SlotTracker) const override;
2276#endif
2277};
2278
2279/// A pure virtual base class for all recipes modeling header phis, including
2280/// phis for first order recurrences, pointer inductions and reductions. The
2281/// start value is the first operand of the recipe and the incoming value from
2282/// the backedge is the second operand.
2283///
2284/// Inductions are modeled using the following sub-classes:
2285/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2286/// floating point inductions with arbitrary start and step values. Produces
2287/// a vector PHI per-part.
2288/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2289/// pointer induction. Produces either a vector PHI per-part or scalar values
2290/// per-lane based on the canonical induction.
2291/// * VPFirstOrderRecurrencePHIRecipe
2292/// * VPReductionPHIRecipe
2293/// * VPActiveLaneMaskPHIRecipe
2294/// * VPEVLBasedIVPHIRecipe
2295///
2296/// Note that the canonical IV is modeled as a VPRegionValue associated with
2297/// its loop region.
2299 public VPPhiAccessors {
2300protected:
2301 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2302 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2303 : VPSingleDefRecipe(VPRecipeID, Start, UnderlyingInstr, DL) {}
2304
2305 const VPRecipeBase *getAsRecipe() const override { return this; }
2306
2307public:
2308 ~VPHeaderPHIRecipe() override = default;
2309
2310 /// Method to support type inquiry through isa, cast, and dyn_cast.
2311 static inline bool classof(const VPRecipeBase *R) {
2312 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2313 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2314 }
2315 static inline bool classof(const VPValue *V) {
2316 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2317 }
2318 static inline bool classof(const VPSingleDefRecipe *R) {
2319 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2320 }
2321
2322 /// Generate the phi nodes.
2323 void execute(VPTransformState &State) override = 0;
2324
2325 /// Return the cost of this header phi recipe.
2327 VPCostContext &Ctx) const override;
2328
2329 /// Returns the start value of the phi, if one is set.
2331 return getNumOperands() == 0 ? nullptr : getOperand(0);
2332 }
2334 return getNumOperands() == 0 ? nullptr : getOperand(0);
2335 }
2336
2337 /// Update the start value of the recipe.
2339
2340 /// Returns the incoming value from the loop backedge.
2342 return getOperand(1);
2343 }
2344
2345 /// Update the incoming value from the loop backedge.
2347
2348 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2349 /// to be a recipe.
2351 return *getBackedgeValue()->getDefiningRecipe();
2352 }
2353
2354protected:
2355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2356 /// Print the recipe.
2357 void printRecipe(raw_ostream &O, const Twine &Indent,
2358 VPSlotTracker &SlotTracker) const override = 0;
2359#endif
2360};
2361
2362/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2363/// VPWidenPointerInductionRecipe), providing shared functionality, including
2364/// retrieving the step value, induction descriptor and original phi node.
2366 InductionDescriptor IndDesc;
2367
2368public:
2369 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2370 VPValue *Step, const InductionDescriptor &IndDesc,
2371 DebugLoc DL)
2372 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2373 addOperand(Step);
2374 }
2375
2376 static inline bool classof(const VPRecipeBase *R) {
2377 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2378 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2379 }
2380
2381 static inline bool classof(const VPValue *V) {
2382 auto *R = V->getDefiningRecipe();
2383 return R && classof(R);
2384 }
2385
2386 static inline bool classof(const VPSingleDefRecipe *R) {
2387 return classof(static_cast<const VPRecipeBase *>(R));
2388 }
2389
2390 void execute(VPTransformState &State) override = 0;
2391
2392 /// Returns the start value of the induction.
2394
2395 /// Returns the step value of the induction.
2397 const VPValue *getStepValue() const { return getOperand(1); }
2398
2399 /// Update the step value of the recipe.
2400 void setStepValue(VPValue *V) { setOperand(1, V); }
2401
2403 const VPValue *getVFValue() const { return getOperand(2); }
2404
2405 /// Returns the number of incoming values, also number of incoming blocks.
2406 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2407 /// incoming value, its start value.
2408 unsigned getNumIncoming() const override { return 1; }
2409
2410 /// Returns the underlying PHINode if one exists, or null otherwise.
2414
2415 /// Returns the induction descriptor for the recipe.
2416 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2417
2419 // TODO: All operands of base recipe must exist and be at same index in
2420 // derived recipe.
2422 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2423 }
2424
2426 // TODO: All operands of base recipe must exist and be at same index in
2427 // derived recipe.
2429 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2430 }
2431
2432 /// Returns true if the recipe only uses the first lane of operand \p Op.
2433 bool usesFirstLaneOnly(const VPValue *Op) const override {
2435 "Op must be an operand of the recipe");
2436 // The recipe creates its own wide start value, so it only requests the
2437 // first lane of the operand.
2438 // TODO: Remove once creating the start value is modeled separately.
2439 return Op == getStartValue() || Op == getStepValue();
2440 }
2441};
2442
2443/// A recipe for handling phi nodes of integer and floating-point inductions,
2444/// producing their vector values. This is an abstract recipe and must be
2445/// converted to concrete recipes before executing.
2447 public VPIRFlags {
2448 TruncInst *Trunc;
2449
2450 // If this recipe is unrolled it will have 2 additional operands.
2451 bool isUnrolled() const { return getNumOperands() == 5; }
2452
2453public:
2455 VPValue *VF, const InductionDescriptor &IndDesc,
2456 const VPIRFlags &Flags, DebugLoc DL)
2457 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2458 Start, Step, IndDesc, DL),
2459 VPIRFlags(Flags), Trunc(nullptr) {
2460 addOperand(VF);
2461 }
2462
2464 VPValue *VF, const InductionDescriptor &IndDesc,
2465 TruncInst *Trunc, const VPIRFlags &Flags,
2466 DebugLoc DL)
2467 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2468 Start, Step, IndDesc, DL),
2469 VPIRFlags(Flags), Trunc(Trunc) {
2470 addOperand(VF);
2472 (void)Metadata;
2473 if (Trunc)
2475 assert(Metadata.empty() && "unexpected metadata on Trunc");
2476 }
2477
2479
2485
2486 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2487
2488 void execute(VPTransformState &State) override {
2489 llvm_unreachable("cannot execute this recipe, should be expanded via "
2490 "expandVPWidenIntOrFpInductionRecipe");
2491 }
2492
2493 /// Returns the start value of the induction.
2495
2496 /// If the recipe has been unrolled, return the VPValue for the induction
2497 /// increment, otherwise return null.
2499 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2500 }
2501
2502 /// Returns the number of incoming values, also number of incoming blocks.
2503 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2504 /// incoming value, its start value.
2505 unsigned getNumIncoming() const override { return 1; }
2506
2507 /// Returns the first defined value as TruncInst, if it is one or nullptr
2508 /// otherwise.
2509 TruncInst *getTruncInst() { return Trunc; }
2510 const TruncInst *getTruncInst() const { return Trunc; }
2511
2512 /// Returns true if the induction is canonical, i.e. starting at 0 and
2513 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2514 /// same type as the canonical induction.
2515 bool isCanonical() const;
2516
2517 /// Returns the scalar type of the induction.
2519 return Trunc ? Trunc->getType() : getStartValue()->getType();
2520 }
2521
2522 /// Returns the VPValue representing the value of this induction at
2523 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2524 /// take place.
2526 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2527 }
2528
2529protected:
2530#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2531 /// Print the recipe.
2532 void printRecipe(raw_ostream &O, const Twine &Indent,
2533 VPSlotTracker &SlotTracker) const override;
2534#endif
2535};
2536
2538public:
2539 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2540 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2541 /// VF*UF.
2543 VPValue *NumUnrolledElems,
2544 const InductionDescriptor &IndDesc, DebugLoc DL)
2545 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2546 Start, Step, IndDesc, DL) {
2547 addOperand(NumUnrolledElems);
2548 }
2549
2551
2557
2558 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2559
2560 /// Generate vector values for the pointer induction.
2561 void execute(VPTransformState &State) override {
2562 llvm_unreachable("cannot execute this recipe, should be expanded via "
2563 "expandVPWidenPointerInduction");
2564 };
2565
2566 /// Returns true if only scalar values will be generated.
2567 bool onlyScalarsGenerated(bool IsScalable);
2568
2569protected:
2570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2571 /// Print the recipe.
2572 void printRecipe(raw_ostream &O, const Twine &Indent,
2573 VPSlotTracker &SlotTracker) const override;
2574#endif
2575};
2576
2577/// A recipe for widened phis. Incoming values are operands of the recipe and
2578/// their operand index corresponds to the incoming predecessor block. If the
2579/// recipe is placed in an entry block to a (non-replicate) region, it must have
2580/// exactly 2 incoming values, the first from the predecessor of the region and
2581/// the second from the exiting block of the region.
2583 public VPPhiAccessors {
2584 /// Name to use for the generated IR instruction for the widened phi.
2585 std::string Name;
2586
2587public:
2588 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingvValues,
2589 /// debug location \p DL and \p Name.
2591 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2592 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues, DL),
2593 Name(Name.str()) {}
2594
2596 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2597 }
2598
2599 ~VPWidenPHIRecipe() override = default;
2600
2601 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2602
2603 /// Generate the phi/select nodes.
2604 void execute(VPTransformState &State) override;
2605
2606 /// Return the cost of this VPWidenPHIRecipe.
2608 VPCostContext &Ctx) const override;
2609
2610protected:
2611#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2612 /// Print the recipe.
2613 void printRecipe(raw_ostream &O, const Twine &Indent,
2614 VPSlotTracker &SlotTracker) const override;
2615#endif
2616
2617 const VPRecipeBase *getAsRecipe() const override { return this; }
2618};
2619
2620/// A recipe for handling first-order recurrence phis. The start value is the
2621/// first operand of the recipe and the incoming value from the backedge is the
2622/// second operand.
2625 VPValue &BackedgeValue)
2626 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2627 &Start) {
2628 addOperand(&BackedgeValue);
2629 }
2630
2631 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2632
2637
2638 void execute(VPTransformState &State) override;
2639
2640 /// Return the cost of this first-order recurrence phi recipe.
2642 VPCostContext &Ctx) const override;
2643
2644 /// Returns true if the recipe only uses the first lane of operand \p Op.
2645 bool usesFirstLaneOnly(const VPValue *Op) const override {
2647 "Op must be an operand of the recipe");
2648 return Op == getStartValue();
2649 }
2650
2651protected:
2652#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2653 /// Print the recipe.
2654 void printRecipe(raw_ostream &O, const Twine &Indent,
2655 VPSlotTracker &SlotTracker) const override;
2656#endif
2657};
2658
2659/// Possible variants of a reduction.
2660
2661/// This reduction is ordered and in-loop.
2662struct RdxOrdered {};
2663/// This reduction is in-loop.
2664struct RdxInLoop {};
2665/// This reduction is unordered with the partial result scaled down by some
2666/// factor.
2669};
2670using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2671
2672inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2673 unsigned ScaleFactor) {
2674 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2675 if (Ordered)
2676 return RdxOrdered{};
2677 if (InLoop)
2678 return RdxInLoop{};
2679 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2680}
2681
2682/// A recipe for handling reduction phis. The start value is the first operand
2683/// of the recipe and the incoming value from the backedge is the second
2684/// operand.
2686 /// The recurrence kind of the reduction.
2687 const RecurKind Kind;
2688
2689 ReductionStyle Style;
2690
2691 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2692 /// patterns for argmin/argmax).
2693 /// TODO: Also support cases where the phi itself has a single use, but its
2694 /// compare has multiple uses.
2695 bool HasUsesOutsideReductionChain;
2696
2697public:
2698 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2700 VPValue &BackedgeValue, ReductionStyle Style,
2701 const VPIRFlags &Flags,
2702 bool HasUsesOutsideReductionChain = false)
2703 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2704 VPIRFlags(Flags), Kind(Kind), Style(Style),
2705 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2706 addOperand(&BackedgeValue);
2707 }
2708
2709 ~VPReductionPHIRecipe() override = default;
2710
2712 return new VPReductionPHIRecipe(
2714 *getOperand(0), *getBackedgeValue(), Style, *this,
2715 HasUsesOutsideReductionChain);
2716 }
2717
2718 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2719
2720 /// Generate the phi/select nodes.
2721 void execute(VPTransformState &State) override;
2722
2723 /// Get the factor that the VF of this recipe's output should be scaled by, or
2724 /// 1 if it isn't scaled.
2725 unsigned getVFScaleFactor() const {
2726 auto *Partial = std::get_if<RdxUnordered>(&Style);
2727 return Partial ? Partial->VFScaleFactor : 1;
2728 }
2729
2730 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2731 /// > 1.
2732 void setVFScaleFactor(unsigned ScaleFactor) {
2733 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2734 Style = RdxUnordered{ScaleFactor};
2735 }
2736
2737 /// Returns the number of incoming values, also number of incoming blocks.
2738 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2739 /// incoming value, its start value.
2740 unsigned getNumIncoming() const override { return 2; }
2741
2742 /// Returns the recurrence kind of the reduction.
2743 RecurKind getRecurrenceKind() const { return Kind; }
2744
2745 /// Returns true, if the phi is part of an ordered reduction.
2746 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2747
2748 /// Returns true if the phi is part of an in-loop reduction.
2749 bool isInLoop() const {
2750 return std::holds_alternative<RdxInLoop>(Style) ||
2751 std::holds_alternative<RdxOrdered>(Style);
2752 }
2753
2754 /// Returns true if the reduction outputs a vector with a scaled down VF.
2755 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2756
2757 /// Returns true, if the phi is part of a multi-use reduction.
2759 return HasUsesOutsideReductionChain;
2760 }
2761
2762 /// Returns true if the recipe only uses the first lane of operand \p Op.
2763 bool usesFirstLaneOnly(const VPValue *Op) const override {
2765 "Op must be an operand of the recipe");
2766 return isOrdered() || isInLoop();
2767 }
2768
2769protected:
2770#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2771 /// Print the recipe.
2772 void printRecipe(raw_ostream &O, const Twine &Indent,
2773 VPSlotTracker &SlotTracker) const override;
2774#endif
2775};
2776
2777/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2778/// instructions.
2780public:
2781 /// The blend operation is a User of the incoming values and of their
2782 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2783 /// be omitted (implied by passing an odd number of operands) in which case
2784 /// all other incoming values are merged into it.
2786 const VPIRFlags &Flags, DebugLoc DL)
2787 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2788 assert(Operands.size() >= 2 && "Expected at least two operands!");
2789 setUnderlyingValue(Phi);
2790 }
2791
2792 VPBlendRecipe *clone() override {
2794 operands(), *this, getDebugLoc());
2795 }
2796
2797 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2798
2799 /// A normalized blend is one that has an odd number of operands, whereby the
2800 /// first operand does not have an associated mask.
2801 bool isNormalized() const { return getNumOperands() % 2; }
2802
2803 /// Return the number of incoming values, taking into account when normalized
2804 /// the first incoming value will have no mask.
2805 unsigned getNumIncomingValues() const {
2806 return (getNumOperands() + isNormalized()) / 2;
2807 }
2808
2809 /// Return incoming value number \p Idx.
2810 VPValue *getIncomingValue(unsigned Idx) const {
2811 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2812 }
2813
2814 /// Return mask number \p Idx.
2815 VPValue *getMask(unsigned Idx) const {
2816 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2817 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2818 }
2819
2820 /// Set mask number \p Idx to \p V.
2821 void setMask(unsigned Idx, VPValue *V) {
2822 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2823 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2824 }
2825
2826 void execute(VPTransformState &State) override {
2827 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2828 }
2829
2830 /// Return the cost of this VPWidenMemoryRecipe.
2831 InstructionCost computeCost(ElementCount VF,
2832 VPCostContext &Ctx) const override;
2833
2834 /// Returns true if the recipe only uses the first lane of operand \p Op.
2835 bool usesFirstLaneOnly(const VPValue *Op) const override;
2836
2837protected:
2838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2839 /// Print the recipe.
2840 void printRecipe(raw_ostream &O, const Twine &Indent,
2841 VPSlotTracker &SlotTracker) const override;
2842#endif
2843};
2844
2845/// A common base class for interleaved memory operations.
2846/// An Interleaved memory operation is a memory access method that combines
2847/// multiple strided loads/stores into a single wide load/store with shuffles.
2848/// The first operand is the start address. The optional operands are, in order,
2849/// the stored values and the mask.
2851 public VPIRMetadata {
2853
2854 /// Indicates if the interleave group is in a conditional block and requires a
2855 /// mask.
2856 bool HasMask = false;
2857
2858 /// Indicates if gaps between members of the group need to be masked out or if
2859 /// unusued gaps can be loaded speculatively.
2860 bool NeedsMaskForGaps = false;
2861
2862protected:
2863 VPInterleaveBase(const unsigned char SC,
2865 ArrayRef<VPValue *> Operands,
2866 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2867 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2868 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2869 NeedsMaskForGaps(NeedsMaskForGaps) {
2870 // TODO: extend the masked interleaved-group support to reversed access.
2871 assert((!Mask || !IG->isReverse()) &&
2872 "Reversed masked interleave-group not supported.");
2873 if (StoredValues.empty()) {
2874 for (Instruction *Inst : IG->members()) {
2875 assert(!Inst->getType()->isVoidTy() && "must have result");
2876 new VPRecipeValue(this, Inst);
2877 }
2878 } else {
2879 for (auto *SV : StoredValues)
2880 addOperand(SV);
2881 }
2882 if (Mask) {
2883 HasMask = true;
2884 addOperand(Mask);
2885 }
2886 }
2887
2888public:
2889 VPInterleaveBase *clone() override = 0;
2890
2891 static inline bool classof(const VPRecipeBase *R) {
2892 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
2893 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
2894 }
2895
2896 static inline bool classof(const VPUser *U) {
2897 auto *R = dyn_cast<VPRecipeBase>(U);
2898 return R && classof(R);
2899 }
2900
2901 /// Return the address accessed by this recipe.
2902 VPValue *getAddr() const {
2903 return getOperand(0); // Address is the 1st, mandatory operand.
2904 }
2905
2906 /// Return the mask used by this recipe. Note that a full mask is represented
2907 /// by a nullptr.
2908 VPValue *getMask() const {
2909 // Mask is optional and the last operand.
2910 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2911 }
2912
2913 /// Return true if the access needs a mask because of the gaps.
2914 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
2915
2917
2918 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2919
2920 void execute(VPTransformState &State) override {
2921 llvm_unreachable("VPInterleaveBase should not be instantiated.");
2922 }
2923
2924 /// Return the cost of this recipe.
2925 InstructionCost computeCost(ElementCount VF,
2926 VPCostContext &Ctx) const override;
2927
2928 /// Returns true if the recipe only uses the first lane of operand \p Op.
2929 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
2930
2931 /// Returns the number of stored operands of this interleave group. Returns 0
2932 /// for load interleave groups.
2933 virtual unsigned getNumStoreOperands() const = 0;
2934
2935 /// Return the VPValues stored by this interleave group. If it is a load
2936 /// interleave group, return an empty ArrayRef.
2938 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
2940 }
2941};
2942
2943/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2944/// or stores into one wide load/store and shuffles. The first operand of a
2945/// VPInterleave recipe is the address, followed by the stored values, followed
2946/// by an optional mask.
2948public:
2950 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2951 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2952 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
2953 Mask, NeedsMaskForGaps, MD, DL) {}
2954
2955 ~VPInterleaveRecipe() override = default;
2956
2960 needsMaskForGaps(), *this, getDebugLoc());
2961 }
2962
2963 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
2964
2965 /// Generate the wide load or store, and shuffles.
2966 void execute(VPTransformState &State) override;
2967
2968 bool usesFirstLaneOnly(const VPValue *Op) const override {
2970 "Op must be an operand of the recipe");
2971 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2972 }
2973
2974 unsigned getNumStoreOperands() const override {
2975 return getNumOperands() - (getMask() ? 2 : 1);
2976 }
2977
2978protected:
2979#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2980 /// Print the recipe.
2981 void printRecipe(raw_ostream &O, const Twine &Indent,
2982 VPSlotTracker &SlotTracker) const override;
2983#endif
2984};
2985
2986/// A recipe for interleaved memory operations with vector-predication
2987/// intrinsics. The first operand is the address, the second operand is the
2988/// explicit vector length. Stored values and mask are optional operands.
2990public:
2992 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
2993 R.getInterleaveGroup(), {R.getAddr(), &EVL},
2994 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
2995 R.getDebugLoc()) {
2996 assert(!getInterleaveGroup()->isReverse() &&
2997 "Reversed interleave-group with tail folding is not supported.");
2998 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
2999 "supported for scalable vector.");
3000 }
3001
3002 ~VPInterleaveEVLRecipe() override = default;
3003
3005 llvm_unreachable("cloning not implemented yet");
3006 }
3007
3008 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3009
3010 /// The VPValue of the explicit vector length.
3011 VPValue *getEVL() const { return getOperand(1); }
3012
3013 /// Generate the wide load or store, and shuffles.
3014 void execute(VPTransformState &State) override;
3015
3016 /// The recipe only uses the first lane of the address, and EVL operand.
3017 bool usesFirstLaneOnly(const VPValue *Op) const override {
3019 "Op must be an operand of the recipe");
3020 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3021 Op == getEVL();
3022 }
3023
3024 unsigned getNumStoreOperands() const override {
3025 return getNumOperands() - (getMask() ? 3 : 2);
3026 }
3027
3028protected:
3029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3030 /// Print the recipe.
3031 void printRecipe(raw_ostream &O, const Twine &Indent,
3032 VPSlotTracker &SlotTracker) const override;
3033#endif
3034};
3035
3036/// A recipe to represent inloop, ordered or partial reduction operations. It
3037/// performs a reduction on a vector operand into a scalar (vector in the case
3038/// of a partial reduction) value, and adds the result to a chain. The Operands
3039/// are {ChainOp, VecOp, [Condition]}.
3041
3042 /// The recurrence kind for the reduction in question.
3043 RecurKind RdxKind;
3044 /// Whether the reduction is conditional.
3045 bool IsConditional = false;
3046 ReductionStyle Style;
3047
3048protected:
3049 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3051 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3052 ReductionStyle Style, DebugLoc DL)
3053 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3054 Style(Style) {
3055 if (CondOp) {
3056 IsConditional = true;
3057 addOperand(CondOp);
3058 }
3060 }
3061
3062public:
3064 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3066 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3067 {ChainOp, VecOp}, CondOp, Style, DL) {}
3068
3070 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3072 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3073 {ChainOp, VecOp}, CondOp, Style, DL) {}
3074
3075 ~VPReductionRecipe() override = default;
3076
3078 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3080 getCondOp(), Style, getDebugLoc());
3081 }
3082
3083 static inline bool classof(const VPRecipeBase *R) {
3084 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3085 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3086 }
3087
3088 static inline bool classof(const VPUser *U) {
3089 auto *R = dyn_cast<VPRecipeBase>(U);
3090 return R && classof(R);
3091 }
3092
3093 static inline bool classof(const VPValue *VPV) {
3094 const VPRecipeBase *R = VPV->getDefiningRecipe();
3095 return R && classof(R);
3096 }
3097
3098 static inline bool classof(const VPSingleDefRecipe *R) {
3099 return classof(static_cast<const VPRecipeBase *>(R));
3100 }
3101
3102 /// Generate the reduction in the loop.
3103 void execute(VPTransformState &State) override;
3104
3105 /// Return the cost of VPReductionRecipe.
3106 InstructionCost computeCost(ElementCount VF,
3107 VPCostContext &Ctx) const override;
3108
3109 /// Return the recurrence kind for the in-loop reduction.
3110 RecurKind getRecurrenceKind() const { return RdxKind; }
3111 /// Return true if the in-loop reduction is ordered.
3112 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3113 /// Return true if the in-loop reduction is conditional.
3114 bool isConditional() const { return IsConditional; };
3115 /// Returns true if the reduction outputs a vector with a scaled down VF.
3116 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3117 /// Returns true if the reduction is in-loop.
3118 bool isInLoop() const {
3119 return std::holds_alternative<RdxInLoop>(Style) ||
3120 std::holds_alternative<RdxOrdered>(Style);
3121 }
3122 /// The VPValue of the scalar Chain being accumulated.
3123 VPValue *getChainOp() const { return getOperand(0); }
3124 /// The VPValue of the vector value to be reduced.
3125 VPValue *getVecOp() const { return getOperand(1); }
3126 /// The VPValue of the condition for the block.
3128 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3129 }
3130 /// Get the factor that the VF of this recipe's output should be scaled by, or
3131 /// 1 if it isn't scaled.
3132 unsigned getVFScaleFactor() const {
3133 auto *Partial = std::get_if<RdxUnordered>(&Style);
3134 return Partial ? Partial->VFScaleFactor : 1;
3135 }
3136
3137protected:
3138#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3139 /// Print the recipe.
3140 void printRecipe(raw_ostream &O, const Twine &Indent,
3141 VPSlotTracker &SlotTracker) const override;
3142#endif
3143};
3144
3145/// A recipe to represent inloop reduction operations with vector-predication
3146/// intrinsics, performing a reduction on a vector operand with the explicit
3147/// vector length (EVL) into a scalar value, and adding the result to a chain.
3148/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3150public:
3153 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3154 R.getFastMathFlags(),
3156 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3157 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3158 DL) {}
3159
3160 ~VPReductionEVLRecipe() override = default;
3161
3163 llvm_unreachable("cloning not implemented yet");
3164 }
3165
3166 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3167
3168 /// Generate the reduction in the loop
3169 void execute(VPTransformState &State) override;
3170
3171 /// The VPValue of the explicit vector length.
3172 VPValue *getEVL() const { return getOperand(2); }
3173
3174 /// Returns true if the recipe only uses the first lane of operand \p Op.
3175 bool usesFirstLaneOnly(const VPValue *Op) const override {
3177 "Op must be an operand of the recipe");
3178 return Op == getEVL();
3179 }
3180
3181protected:
3182#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3183 /// Print the recipe.
3184 void printRecipe(raw_ostream &O, const Twine &Indent,
3185 VPSlotTracker &SlotTracker) const override;
3186#endif
3187};
3188
3189/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3190/// copies of the original scalar type, one per lane, instead of producing a
3191/// single copy of widened type for all lanes. If the instruction is known to be
3192/// a single scalar, only one copy will be generated.
3194 public VPIRMetadata {
3195 /// Indicator if only a single replica per lane is needed.
3196 bool IsSingleScalar;
3197
3198 /// Indicator if the replicas are also predicated.
3199 bool IsPredicated;
3200
3201public:
3203 bool IsSingleScalar, VPValue *Mask = nullptr,
3204 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3205 DebugLoc DL = DebugLoc::getUnknown())
3206 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3207 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3208 IsPredicated(Mask) {
3209 setUnderlyingValue(I);
3210 if (Mask)
3211 addOperand(Mask);
3212 }
3213
3214 ~VPReplicateRecipe() override = default;
3215
3217 auto *Copy = new VPReplicateRecipe(
3218 getUnderlyingInstr(), operands(), IsSingleScalar,
3219 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3220 Copy->transferFlags(*this);
3221 return Copy;
3222 }
3223
3224 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3225
3226 /// Generate replicas of the desired Ingredient. Replicas will be generated
3227 /// for all parts and lanes unless a specific part and lane are specified in
3228 /// the \p State.
3229 void execute(VPTransformState &State) override;
3230
3231 /// Return the cost of this VPReplicateRecipe.
3232 InstructionCost computeCost(ElementCount VF,
3233 VPCostContext &Ctx) const override;
3234
3235 bool isSingleScalar() const { return IsSingleScalar; }
3236
3237 bool isPredicated() const { return IsPredicated; }
3238
3239 /// Returns true if the recipe only uses the first lane of operand \p Op.
3240 bool usesFirstLaneOnly(const VPValue *Op) const override {
3242 "Op must be an operand of the recipe");
3243 return isSingleScalar();
3244 }
3245
3246 /// Returns true if the recipe uses scalars of operand \p Op.
3247 bool usesScalars(const VPValue *Op) const override {
3249 "Op must be an operand of the recipe");
3250 return true;
3251 }
3252
3253 /// Return the mask of a predicated VPReplicateRecipe.
3255 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3256 return getOperand(getNumOperands() - 1);
3257 }
3258
3259 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3260
3261protected:
3262#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3263 /// Print the recipe.
3264 void printRecipe(raw_ostream &O, const Twine &Indent,
3265 VPSlotTracker &SlotTracker) const override;
3266#endif
3267};
3268
3269/// A recipe for generating conditional branches on the bits of a mask.
3271public:
3273 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3274
3277 }
3278
3279 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3280
3281 /// Generate the extraction of the appropriate bit from the block mask and the
3282 /// conditional branch.
3283 void execute(VPTransformState &State) override;
3284
3285 /// Return the cost of this VPBranchOnMaskRecipe.
3286 InstructionCost computeCost(ElementCount VF,
3287 VPCostContext &Ctx) const override;
3288
3289#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3290 /// Print the recipe.
3291 void printRecipe(raw_ostream &O, const Twine &Indent,
3292 VPSlotTracker &SlotTracker) const override {
3293 O << Indent << "BRANCH-ON-MASK ";
3295 }
3296#endif
3297
3298 /// Returns true if the recipe uses scalars of operand \p Op.
3299 bool usesScalars(const VPValue *Op) const override {
3301 "Op must be an operand of the recipe");
3302 return true;
3303 }
3304};
3305
3306/// A recipe to combine multiple recipes into a single 'expression' recipe,
3307/// which should be considered a single entity for cost-modeling and transforms.
3308/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3309/// expression recipes, before execute. The individual expression recipes are
3310/// completely disconnected from the def-use graph of other recipes not part of
3311/// the expression. Def-use edges between pairs of expression recipes remain
3312/// intact, whereas every edge between an expression recipe and a recipe outside
3313/// the expression is elevated to connect the non-expression recipe with the
3314/// VPExpressionRecipe itself.
3315class VPExpressionRecipe : public VPSingleDefRecipe {
3316 /// Recipes included in this VPExpressionRecipe. This could contain
3317 /// duplicates.
3318 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3319
3320 /// Temporary VPValues used for external operands of the expression, i.e.
3321 /// operands not defined by recipes in the expression.
3322 SmallVector<VPValue *> LiveInPlaceholders;
3323
3324 enum class ExpressionTypes {
3325 /// Represents an inloop extended reduction operation, performing a
3326 /// reduction on an extended vector operand into a scalar value, and adding
3327 /// the result to a chain.
3328 ExtendedReduction,
3329 /// Represent an inloop multiply-accumulate reduction, multiplying the
3330 /// extended vector operands, performing a reduction.add on the result, and
3331 /// adding the scalar result to a chain.
3332 ExtMulAccReduction,
3333 /// Represent an inloop multiply-accumulate reduction, multiplying the
3334 /// vector operands, performing a reduction.add on the result, and adding
3335 /// the scalar result to a chain.
3336 MulAccReduction,
3337 /// Represent an inloop multiply-accumulate reduction, multiplying the
3338 /// extended vector operands, negating the multiplication, performing a
3339 /// reduction.add on the result, and adding the scalar result to a chain.
3340 ExtNegatedMulAccReduction,
3341 };
3342
3343 /// Type of the expression.
3344 ExpressionTypes ExpressionType;
3345
3346 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3347 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3348 /// in the expression) are replaced by temporary VPValues and the original
3349 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3350 /// as needed (excluding last) to ensure they are only used by other recipes
3351 /// in the expression.
3352 VPExpressionRecipe(ExpressionTypes ExpressionType,
3353 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3354
3355public:
3357 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3359 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3362 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3363 {Ext0, Ext1, Mul, Red}) {}
3366 VPReductionRecipe *Red)
3367 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3368 {Ext0, Ext1, Mul, Sub, Red}) {
3369 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3370 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3371 "Expected an add reduction");
3372 assert(getNumOperands() >= 3 && "Expected at least three operands");
3373 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3374 assert(SubConst && SubConst->isZero() &&
3375 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3376 }
3377
3379 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3380 for (auto *R : reverse(ExpressionRecipes)) {
3381 if (ExpressionRecipesSeen.insert(R).second)
3382 delete R;
3383 }
3384 for (VPValue *T : LiveInPlaceholders)
3385 delete T;
3386 }
3387
3388 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3389
3390 VPExpressionRecipe *clone() override {
3391 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3392 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3393 for (auto *R : ExpressionRecipes)
3394 NewExpressiondRecipes.push_back(R->clone());
3395 for (auto *New : NewExpressiondRecipes) {
3396 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3397 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3398 // Update placeholder operands in the cloned recipe to use the external
3399 // operands, to be internalized when the cloned expression is constructed.
3400 for (const auto &[Placeholder, OutsideOp] :
3401 zip(LiveInPlaceholders, operands()))
3402 New->replaceUsesOfWith(Placeholder, OutsideOp);
3403 }
3404 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3405 }
3406
3407 /// Return the VPValue to use to infer the result type of the recipe.
3409 unsigned OpIdx =
3410 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3411 : 1;
3412 return getOperand(getNumOperands() - OpIdx);
3413 }
3414
3415 /// Insert the recipes of the expression back into the VPlan, directly before
3416 /// the current recipe. Leaves the expression recipe empty, which must be
3417 /// removed before codegen.
3418 void decompose();
3419
3420 unsigned getVFScaleFactor() const {
3421 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3422 return PR ? PR->getVFScaleFactor() : 1;
3423 }
3424
3425 /// Method for generating code, must not be called as this recipe is abstract.
3426 void execute(VPTransformState &State) override {
3427 llvm_unreachable("recipe must be removed before execute");
3428 }
3429
3431 VPCostContext &Ctx) const override;
3432
3433 /// Returns true if this expression contains recipes that may read from or
3434 /// write to memory.
3435 bool mayReadOrWriteMemory() const;
3436
3437 /// Returns true if this expression contains recipes that may have side
3438 /// effects.
3439 bool mayHaveSideEffects() const;
3440
3441 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3442 bool isSingleScalar() const;
3443
3444protected:
3445#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3446 /// Print the recipe.
3447 void printRecipe(raw_ostream &O, const Twine &Indent,
3448 VPSlotTracker &SlotTracker) const override;
3449#endif
3450};
3451
3452/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3453/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3454/// order to merge values that are set under such a branch and feed their uses.
3455/// The phi nodes can be scalar or vector depending on the users of the value.
3456/// This recipe works in concert with VPBranchOnMaskRecipe.
3458public:
3459 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3460 /// nodes after merging back from a Branch-on-Mask.
3462 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV, DL) {}
3463 ~VPPredInstPHIRecipe() override = default;
3464
3466 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3467 }
3468
3469 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3470
3471 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3472 /// retain SSA form.
3473 void execute(VPTransformState &State) override;
3474
3475 /// Return the cost of this VPPredInstPHIRecipe.
3477 VPCostContext &Ctx) const override {
3478 // TODO: Compute accurate cost after retiring the legacy cost model.
3479 return 0;
3480 }
3481
3482protected:
3483#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3484 /// Print the recipe.
3485 void printRecipe(raw_ostream &O, const Twine &Indent,
3486 VPSlotTracker &SlotTracker) const override;
3487#endif
3488};
3489
3490/// A common base class for widening memory operations. An optional mask can be
3491/// provided as the last operand.
3493 public VPIRMetadata {
3494protected:
3496
3497 /// Alignment information for this memory access.
3499
3500 /// Whether the accessed addresses are consecutive.
3502
3503 /// Whether the memory access is masked.
3504 bool IsMasked = false;
3505
3506 void setMask(VPValue *Mask) {
3507 assert(!IsMasked && "cannot re-set mask");
3508 if (!Mask)
3509 return;
3510 addOperand(Mask);
3511 IsMasked = true;
3512 }
3513
3514 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
3515 std::initializer_list<VPValue *> Operands,
3516 bool Consecutive, const VPIRMetadata &Metadata,
3517 DebugLoc DL)
3518 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(Metadata), Ingredient(I),
3520
3521public:
3523 llvm_unreachable("cloning not supported");
3524 }
3525
3526 static inline bool classof(const VPRecipeBase *R) {
3527 return R->getVPRecipeID() == VPRecipeBase::VPWidenLoadSC ||
3528 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreSC ||
3529 R->getVPRecipeID() == VPRecipeBase::VPWidenLoadEVLSC ||
3530 R->getVPRecipeID() == VPRecipeBase::VPWidenStoreEVLSC;
3531 }
3532
3533 static inline bool classof(const VPUser *U) {
3534 auto *R = dyn_cast<VPRecipeBase>(U);
3535 return R && classof(R);
3536 }
3537
3538 /// Return whether the loaded-from / stored-to addresses are consecutive.
3539 bool isConsecutive() const { return Consecutive; }
3540
3541 /// Return the address accessed by this recipe.
3542 VPValue *getAddr() const { return getOperand(0); }
3543
3544 /// Returns true if the recipe is masked.
3545 bool isMasked() const { return IsMasked; }
3546
3547 /// Return the mask used by this recipe. Note that a full mask is represented
3548 /// by a nullptr.
3549 VPValue *getMask() const {
3550 // Mask is optional and therefore the last operand.
3551 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3552 }
3553
3554 /// Returns the alignment of the memory access.
3555 Align getAlign() const { return Alignment; }
3556
3557 /// Generate the wide load/store.
3558 void execute(VPTransformState &State) override {
3559 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3560 }
3561
3562 /// Return the cost of this VPWidenMemoryRecipe.
3563 InstructionCost computeCost(ElementCount VF,
3564 VPCostContext &Ctx) const override;
3565
3567};
3568
3569/// A recipe for widening load operations, using the address to load from and an
3570/// optional mask.
3572 public VPRecipeValue {
3574 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3575 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadSC, Load, {Addr},
3576 Consecutive, Metadata, DL),
3577 VPRecipeValue(this, &Load) {
3578 setMask(Mask);
3579 }
3580
3583 getMask(), Consecutive, *this, getDebugLoc());
3584 }
3585
3586 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3587
3588 /// Generate a wide load or gather.
3589 void execute(VPTransformState &State) override;
3590
3591 /// Returns true if the recipe only uses the first lane of operand \p Op.
3592 bool usesFirstLaneOnly(const VPValue *Op) const override {
3594 "Op must be an operand of the recipe");
3595 // Widened, consecutive loads operations only demand the first lane of
3596 // their address.
3597 return Op == getAddr() && isConsecutive();
3598 }
3599
3600protected:
3601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3602 /// Print the recipe.
3603 void printRecipe(raw_ostream &O, const Twine &Indent,
3604 VPSlotTracker &SlotTracker) const override;
3605#endif
3606};
3607
3608/// A recipe for widening load operations with vector-predication intrinsics,
3609/// using the address to load from, the explicit vector length and an optional
3610/// mask.
3612 public VPRecipeValue {
3614 VPValue *Mask)
3615 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenLoadEVLSC, L.getIngredient(),
3616 {Addr, &EVL}, L.isConsecutive(), L,
3617 L.getDebugLoc()),
3618 VPRecipeValue(this, &getIngredient()) {
3619 setMask(Mask);
3620 }
3621
3622 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3623
3624 /// Return the EVL operand.
3625 VPValue *getEVL() const { return getOperand(1); }
3626
3627 /// Generate the wide load or gather.
3628 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3629
3630 /// Return the cost of this VPWidenLoadEVLRecipe.
3632 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3633
3634 /// Returns true if the recipe only uses the first lane of operand \p Op.
3635 bool usesFirstLaneOnly(const VPValue *Op) const override {
3637 "Op must be an operand of the recipe");
3638 // Widened loads only demand the first lane of EVL and consecutive loads
3639 // only demand the first lane of their address.
3640 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3641 }
3642
3643protected:
3644#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3645 /// Print the recipe.
3646 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3647 VPSlotTracker &SlotTracker) const override;
3648#endif
3649};
3650
3651/// A recipe for widening store operations, using the stored value, the address
3652/// to store to and an optional mask.
3654 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3655 VPValue *Mask, bool Consecutive,
3656 const VPIRMetadata &Metadata, DebugLoc DL)
3657 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreSC, Store,
3658 {Addr, StoredVal}, Consecutive, Metadata, DL) {
3659 setMask(Mask);
3660 }
3661
3665 *this, getDebugLoc());
3666 }
3667
3668 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3669
3670 /// Return the value stored by this recipe.
3671 VPValue *getStoredValue() const { return getOperand(1); }
3672
3673 /// Generate a wide store or scatter.
3674 void execute(VPTransformState &State) override;
3675
3676 /// Returns true if the recipe only uses the first lane of operand \p Op.
3677 bool usesFirstLaneOnly(const VPValue *Op) const override {
3679 "Op must be an operand of the recipe");
3680 // Widened, consecutive stores only demand the first lane of their address,
3681 // unless the same operand is also stored.
3682 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3683 }
3684
3685protected:
3686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3687 /// Print the recipe.
3688 void printRecipe(raw_ostream &O, const Twine &Indent,
3689 VPSlotTracker &SlotTracker) const override;
3690#endif
3691};
3692
3693/// A recipe for widening store operations with vector-predication intrinsics,
3694/// using the value to store, the address to store to, the explicit vector
3695/// length and an optional mask.
3698 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3699 : VPWidenMemoryRecipe(VPRecipeBase::VPWidenStoreEVLSC, S.getIngredient(),
3700 {Addr, StoredVal, &EVL}, S.isConsecutive(), S,
3701 S.getDebugLoc()) {
3702 setMask(Mask);
3703 }
3704
3705 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3706
3707 /// Return the address accessed by this recipe.
3708 VPValue *getStoredValue() const { return getOperand(1); }
3709
3710 /// Return the EVL operand.
3711 VPValue *getEVL() const { return getOperand(2); }
3712
3713 /// Generate the wide store or scatter.
3714 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3715
3716 /// Return the cost of this VPWidenStoreEVLRecipe.
3718 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3719
3720 /// Returns true if the recipe only uses the first lane of operand \p Op.
3721 bool usesFirstLaneOnly(const VPValue *Op) const override {
3723 "Op must be an operand of the recipe");
3724 if (Op == getEVL()) {
3725 assert(getStoredValue() != Op && "unexpected store of EVL");
3726 return true;
3727 }
3728 // Widened, consecutive memory operations only demand the first lane of
3729 // their address, unless the same operand is also stored. That latter can
3730 // happen with opaque pointers.
3731 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3732 }
3733
3734protected:
3735#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3736 /// Print the recipe.
3737 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3738 VPSlotTracker &SlotTracker) const override;
3739#endif
3740};
3741
3742/// Recipe to expand a SCEV expression.
3744 const SCEV *Expr;
3745
3746public:
3748 : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}), Expr(Expr) {}
3749
3750 ~VPExpandSCEVRecipe() override = default;
3751
3752 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3753
3754 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3755
3756 void execute(VPTransformState &State) override {
3757 llvm_unreachable("SCEV expressions must be expanded before final execute");
3758 }
3759
3760 /// Return the cost of this VPExpandSCEVRecipe.
3762 VPCostContext &Ctx) const override {
3763 // TODO: Compute accurate cost after retiring the legacy cost model.
3764 return 0;
3765 }
3766
3767 const SCEV *getSCEV() const { return Expr; }
3768
3769protected:
3770#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3771 /// Print the recipe.
3772 void printRecipe(raw_ostream &O, const Twine &Indent,
3773 VPSlotTracker &SlotTracker) const override;
3774#endif
3775};
3776
3777/// A recipe for generating the active lane mask for the vector loop that is
3778/// used to predicate the vector operations.
3780public:
3782 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3783 StartMask, DL) {}
3784
3785 ~VPActiveLaneMaskPHIRecipe() override = default;
3786
3789 if (getNumOperands() == 2)
3790 R->addOperand(getOperand(1));
3791 return R;
3792 }
3793
3794 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3795
3796 /// Generate the active lane mask phi of the vector loop.
3797 void execute(VPTransformState &State) override;
3798
3799protected:
3800#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3801 /// Print the recipe.
3802 void printRecipe(raw_ostream &O, const Twine &Indent,
3803 VPSlotTracker &SlotTracker) const override;
3804#endif
3805};
3806
3807/// A recipe for generating the phi node tracking the current scalar iteration
3808/// index. It starts at the start value of the canonical induction and gets
3809/// incremented by the number of scalar iterations processed by the vector loop
3810/// iteration. The increment does not have to be loop invariant.
3812public:
3814 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3815 StartIV, DL) {}
3816
3817 ~VPCurrentIterationPHIRecipe() override = default;
3818
3820 llvm_unreachable("cloning not implemented yet");
3821 }
3822
3823 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3824
3825 void execute(VPTransformState &State) override {
3826 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3827 "scalar phi recipe");
3828 }
3829
3830 /// Return the cost of this VPCurrentIterationPHIRecipe.
3832 VPCostContext &Ctx) const override {
3833 // For now, match the behavior of the legacy cost model.
3834 return 0;
3835 }
3836
3837 /// Returns true if the recipe only uses the first lane of operand \p Op.
3838 bool usesFirstLaneOnly(const VPValue *Op) const override {
3840 "Op must be an operand of the recipe");
3841 return true;
3842 }
3843
3844protected:
3845#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3846 /// Print the recipe.
3847 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3848 VPSlotTracker &SlotTracker) const override;
3849#endif
3850};
3851
3852/// A Recipe for widening the canonical induction variable of the vector loop.
3854 public VPUnrollPartAccessor<1> {
3855public:
3857 : VPSingleDefRecipe(VPRecipeBase::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3858
3859 ~VPWidenCanonicalIVRecipe() override = default;
3860
3864
3865 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
3866
3867 /// Generate a canonical vector induction variable of the vector loop, with
3868 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3869 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3870 void execute(VPTransformState &State) override;
3871
3872 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3874 VPCostContext &Ctx) const override {
3875 // TODO: Compute accurate cost after retiring the legacy cost model.
3876 return 0;
3877 }
3878
3879 /// Return the canonical IV being widened.
3883
3884protected:
3885#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3886 /// Print the recipe.
3887 void printRecipe(raw_ostream &O, const Twine &Indent,
3888 VPSlotTracker &SlotTracker) const override;
3889#endif
3890};
3891
3892/// A recipe for converting the input value \p IV value to the corresponding
3893/// value of an IV with different start and step values, using Start + IV *
3894/// Step.
3896 /// Kind of the induction.
3898 /// If not nullptr, the floating point induction binary operator. Must be set
3899 /// for floating point inductions.
3900 const FPMathOperator *FPBinOp;
3901
3902public:
3904 VPValue *CanonicalIV, VPValue *Step)
3906 IndDesc.getKind(),
3907 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3908 Start, CanonicalIV, Step) {}
3909
3911 const FPMathOperator *FPBinOp, VPIRValue *Start,
3912 VPValue *IV, VPValue *Step)
3913 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step}),
3914 Kind(Kind), FPBinOp(FPBinOp) {}
3915
3916 ~VPDerivedIVRecipe() override = default;
3917
3919 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3920 getStepValue());
3921 }
3922
3923 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
3924
3925 void execute(VPTransformState &State) override {
3926 llvm_unreachable("Expected prior expansion of this recipe");
3927 }
3928
3929 /// Return the cost of this VPDerivedIVRecipe.
3931 VPCostContext &Ctx) const override {
3932 // TODO: Compute accurate cost after retiring the legacy cost model.
3933 return 0;
3934 }
3935
3936 Type *getScalarType() const { return getStartValue()->getType(); }
3937
3939 VPValue *getIndex() const { return getOperand(1); }
3940 VPValue *getStepValue() const { return getOperand(2); }
3941 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
3943
3944 /// Returns true if the recipe only uses the first lane of operand \p Op.
3945 bool usesFirstLaneOnly(const VPValue *Op) const override {
3947 "Op must be an operand of the recipe");
3948 return true;
3949 }
3950
3951protected:
3952#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3953 /// Print the recipe.
3954 void printRecipe(raw_ostream &O, const Twine &Indent,
3955 VPSlotTracker &SlotTracker) const override;
3956#endif
3957};
3958
3959/// A recipe for handling phi nodes of integer and floating-point inductions,
3960/// producing their scalar values. Before unrolling by UF the recipe represents
3961/// the VF*UF scalar values to be produced, or UF scalar values if only first
3962/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
3963/// operand StartIndex to all unroll parts except part 0, as the recipe
3964/// represents the VF scalar values (this number of values is taken from
3965/// State.VF rather than from the VF operand) starting at IV + StartIndex.
3967 Instruction::BinaryOps InductionOpcode;
3968
3969public:
3972 DebugLoc DL)
3973 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
3974 FMFs, DL),
3975 InductionOpcode(Opcode) {}
3976
3978 VPValue *Step, VPValue *VF,
3981 IV, Step, VF, IndDesc.getInductionOpcode(),
3982 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3983 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3984 : FastMathFlags(),
3985 DL) {}
3986
3987 ~VPScalarIVStepsRecipe() override = default;
3988
3990 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
3991 getOperand(2), InductionOpcode,
3993 if (VPValue *StartIndex = getStartIndex())
3994 NewR->setStartIndex(StartIndex);
3995 return NewR;
3996 }
3997
3998 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
3999
4000 /// Generate the scalarized versions of the phi node as needed by their users.
4001 void execute(VPTransformState &State) override;
4002
4003 /// Return the cost of this VPScalarIVStepsRecipe.
4005 VPCostContext &Ctx) const override {
4006 // TODO: Compute accurate cost after retiring the legacy cost model.
4007 return 0;
4008 }
4009
4010 VPValue *getStepValue() const { return getOperand(1); }
4011
4012 /// Return the number of scalars to produce per unroll part, used to compute
4013 /// StartIndex during unrolling.
4014 VPValue *getVFValue() const { return getOperand(2); }
4015
4016 /// Return the StartIndex, or null if known to be zero, valid only after
4017 /// unrolling.
4019 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4020 }
4021
4022 /// Set or add the StartIndex operand.
4023 void setStartIndex(VPValue *StartIndex) {
4024 if (getNumOperands() == 4)
4025 setOperand(3, StartIndex);
4026 else
4027 addOperand(StartIndex);
4028 }
4029
4030 /// Returns true if the recipe only uses the first lane of operand \p Op.
4031 bool usesFirstLaneOnly(const VPValue *Op) const override {
4033 "Op must be an operand of the recipe");
4034 return true;
4035 }
4036
4037 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4038
4039protected:
4040#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4041 /// Print the recipe.
4042 void printRecipe(raw_ostream &O, const Twine &Indent,
4043 VPSlotTracker &SlotTracker) const override;
4044#endif
4045};
4046
4047/// Support casting from VPRecipeBase -> VPPhiAccessors.
4048template <>
4052 /// Used by isa.
4053 static inline bool isPossible(VPRecipeBase *R) {
4054 // TODO: include VPPredInstPHIRecipe too, once it implements VPPhiAccessors.
4056 }
4057
4058 /// Used by cast.
4060 switch (R->getVPRecipeID()) {
4061 case VPRecipeBase::VPInstructionSC:
4062 return cast<VPPhi>(R);
4063 case VPRecipeBase::VPIRInstructionSC:
4064 return cast<VPIRPhi>(R);
4065 case VPRecipeBase::VPWidenPHISC:
4066 return cast<VPWidenPHIRecipe>(R);
4067 default:
4068 return cast<VPHeaderPHIRecipe>(R);
4069 }
4070 }
4071
4072 /// Used by inherited doCastIfPossible to dyn_cast.
4073 static inline VPPhiAccessors *castFailed() { return nullptr; }
4074};
4075
4076template <>
4081template <>
4083 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4084 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4085
4086/// Support casting from VPRecipeBase -> VPIRMetadata.
4087template <>
4091 /// Used by isa.
4092 static inline bool isPossible(VPRecipeBase *R) {
4093 // NOTE: Each recipe inheriting from VPIRMetadata must be listed here.
4098 R);
4099 }
4100
4101 /// Used by cast.
4102 static inline VPIRMetadata *doCast(VPRecipeBase *R) {
4103 switch (R->getVPRecipeID()) {
4104 case VPRecipeBase::VPInstructionSC:
4105 return cast<VPInstruction>(R);
4106 case VPRecipeBase::VPWidenSC:
4107 return cast<VPWidenRecipe>(R);
4108 case VPRecipeBase::VPWidenCastSC:
4109 return cast<VPWidenCastRecipe>(R);
4110 case VPRecipeBase::VPWidenIntrinsicSC:
4112 case VPRecipeBase::VPWidenCallSC:
4113 return cast<VPWidenCallRecipe>(R);
4114 case VPRecipeBase::VPReplicateSC:
4115 return cast<VPReplicateRecipe>(R);
4116 case VPRecipeBase::VPInterleaveSC:
4117 case VPRecipeBase::VPInterleaveEVLSC:
4118 return cast<VPInterleaveBase>(R);
4119 case VPRecipeBase::VPWidenLoadSC:
4120 case VPRecipeBase::VPWidenLoadEVLSC:
4121 case VPRecipeBase::VPWidenStoreSC:
4122 case VPRecipeBase::VPWidenStoreEVLSC:
4123 return cast<VPWidenMemoryRecipe>(R);
4124 default:
4125 llvm_unreachable("Illegal recipe for VPIRMetadata cast");
4126 }
4127 }
4128
4129 /// Used by inherited doCastIfPossible to dyn_cast.
4130 static inline VPIRMetadata *castFailed() { return nullptr; }
4131};
4132
4133template <>
4138template <>
4140 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4141 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4142
4143/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4144/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4145/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4146class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4147 friend class VPlan;
4148
4149 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4150 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4151 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4152 if (Recipe)
4153 appendRecipe(Recipe);
4154 }
4155
4156public:
4158
4159protected:
4160 /// The VPRecipes held in the order of output instructions to generate.
4162
4163 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4164 : VPBlockBase(BlockSC, Name.str()) {}
4165
4166public:
4167 ~VPBasicBlock() override {
4168 while (!Recipes.empty())
4169 Recipes.pop_back();
4170 }
4171
4172 /// Instruction iterators...
4177
4178 //===--------------------------------------------------------------------===//
4179 /// Recipe iterator methods
4180 ///
4181 inline iterator begin() { return Recipes.begin(); }
4182 inline const_iterator begin() const { return Recipes.begin(); }
4183 inline iterator end() { return Recipes.end(); }
4184 inline const_iterator end() const { return Recipes.end(); }
4185
4186 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4187 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4188 inline reverse_iterator rend() { return Recipes.rend(); }
4189 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4190
4191 inline size_t size() const { return Recipes.size(); }
4192 inline bool empty() const { return Recipes.empty(); }
4193 inline const VPRecipeBase &front() const { return Recipes.front(); }
4194 inline VPRecipeBase &front() { return Recipes.front(); }
4195 inline const VPRecipeBase &back() const { return Recipes.back(); }
4196 inline VPRecipeBase &back() { return Recipes.back(); }
4197
4198 /// Returns a reference to the list of recipes.
4200
4201 /// Returns a pointer to a member of the recipe list.
4202 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4203 return &VPBasicBlock::Recipes;
4204 }
4205
4206 /// Method to support type inquiry through isa, cast, and dyn_cast.
4207 static inline bool classof(const VPBlockBase *V) {
4208 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4209 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4210 }
4211
4212 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4213 assert(Recipe && "No recipe to append.");
4214 assert(!Recipe->Parent && "Recipe already in VPlan");
4215 Recipe->Parent = this;
4216 Recipes.insert(InsertPt, Recipe);
4217 }
4218
4219 /// Augment the existing recipes of a VPBasicBlock with an additional
4220 /// \p Recipe as the last recipe.
4221 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4222
4223 /// The method which generates the output IR instructions that correspond to
4224 /// this VPBasicBlock, thereby "executing" the VPlan.
4225 void execute(VPTransformState *State) override;
4226
4227 /// Return the cost of this VPBasicBlock.
4228 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4229
4230 /// Return the position of the first non-phi node recipe in the block.
4231 iterator getFirstNonPhi();
4232
4233 /// Returns an iterator range over the PHI-like recipes in the block.
4237
4238 /// Split current block at \p SplitAt by inserting a new block between the
4239 /// current block and its successors and moving all recipes starting at
4240 /// SplitAt to the new block. Returns the new block.
4241 VPBasicBlock *splitAt(iterator SplitAt);
4242
4243 VPRegionBlock *getEnclosingLoopRegion();
4244 const VPRegionBlock *getEnclosingLoopRegion() const;
4245
4246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4247 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4248 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4249 ///
4250 /// Note that the numbering is applied to the whole VPlan, so printing
4251 /// individual blocks is consistent with the whole VPlan printing.
4252 void print(raw_ostream &O, const Twine &Indent,
4253 VPSlotTracker &SlotTracker) const override;
4254 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4255#endif
4256
4257 /// If the block has multiple successors, return the branch recipe terminating
4258 /// the block. If there are no or only a single successor, return nullptr;
4259 VPRecipeBase *getTerminator();
4260 const VPRecipeBase *getTerminator() const;
4261
4262 /// Returns true if the block is exiting it's parent region.
4263 bool isExiting() const;
4264
4265 /// Clone the current block and it's recipes, without updating the operands of
4266 /// the cloned recipes.
4267 VPBasicBlock *clone() override;
4268
4269 /// Returns the predecessor block at index \p Idx with the predecessors as per
4270 /// the corresponding plain CFG. If the block is an entry block to a region,
4271 /// the first predecessor is the single predecessor of a region, and the
4272 /// second predecessor is the exiting block of the region.
4273 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4274
4275protected:
4276 /// Execute the recipes in the IR basic block \p BB.
4277 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4278
4279 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4280 /// generated for this VPBB.
4281 void connectToPredecessors(VPTransformState &State);
4282
4283private:
4284 /// Create an IR BasicBlock to hold the output instructions generated by this
4285 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4286 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4287};
4288
4289inline const VPBasicBlock *
4291 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4292}
4293
4294/// A special type of VPBasicBlock that wraps an existing IR basic block.
4295/// Recipes of the block get added before the first non-phi instruction in the
4296/// wrapped block.
4297/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4298/// preheader block.
4299class VPIRBasicBlock : public VPBasicBlock {
4300 friend class VPlan;
4301
4302 BasicBlock *IRBB;
4303
4304 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4305 VPIRBasicBlock(BasicBlock *IRBB)
4306 : VPBasicBlock(VPIRBasicBlockSC,
4307 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4308 IRBB(IRBB) {}
4309
4310public:
4311 ~VPIRBasicBlock() override = default;
4312
4313 static inline bool classof(const VPBlockBase *V) {
4314 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4315 }
4316
4317 /// The method which generates the output IR instructions that correspond to
4318 /// this VPBasicBlock, thereby "executing" the VPlan.
4319 void execute(VPTransformState *State) override;
4320
4321 VPIRBasicBlock *clone() override;
4322
4323 BasicBlock *getIRBasicBlock() const { return IRBB; }
4324};
4325
4326/// Track information about the canonical IV value of a region.
4327/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4329 /// VPRegionValue for the canonical IV, whose allocation is managed by
4330 /// VPCanonicalIVInfo.
4331 std::unique_ptr<VPRegionValue> CanIV;
4332
4333 /// Whether the increment of the canonical IV may unsigned wrap or not.
4334 bool HasNUW = true;
4335
4336public:
4338 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4339
4340 VPRegionValue *getRegionValue() { return CanIV.get(); }
4341 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4342
4343 bool hasNUW() const { return HasNUW; }
4344
4345 void clearNUW() { HasNUW = false; }
4346};
4347
4348/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4349/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4350/// A VPRegionBlock may indicate that its contents are to be replicated several
4351/// times. This is designed to support predicated scalarization, in which a
4352/// scalar if-then code structure needs to be generated VF * UF times. Having
4353/// this replication indicator helps to keep a single model for multiple
4354/// candidate VF's. The actual replication takes place only once the desired VF
4355/// and UF have been determined.
4356class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4357 friend class VPlan;
4358
4359 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4360 VPBlockBase *Entry;
4361
4362 /// Hold the Single Exiting block of the SESE region modelled by the
4363 /// VPRegionBlock.
4364 VPBlockBase *Exiting;
4365
4366 /// Holds the Canonical IV of the loop region along with additional
4367 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4368 /// Loop regions retain their canonical IVs until they are dissolved, even if
4369 /// the canonical IV has no users.
4370 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4371
4372 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4373 /// VPRegionBlocks.
4374 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4375 const std::string &Name = "")
4376 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4377 if (Entry) {
4378 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4379 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4380 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4381 Entry->setParent(this);
4382 Exiting->setParent(this);
4383 }
4384 }
4385
4386 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4387 VPBlockBase *Exiting, const std::string &Name = "")
4388 : VPRegionBlock(Entry, Exiting, Name) {
4389 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4390 }
4391
4392public:
4393 ~VPRegionBlock() override = default;
4394
4395 /// Method to support type inquiry through isa, cast, and dyn_cast.
4396 static inline bool classof(const VPBlockBase *V) {
4397 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4398 }
4399
4400 const VPBlockBase *getEntry() const { return Entry; }
4401 VPBlockBase *getEntry() { return Entry; }
4402
4403 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4404 /// EntryBlock must have no predecessors.
4405 void setEntry(VPBlockBase *EntryBlock) {
4406 assert(!EntryBlock->hasPredecessors() &&
4407 "Entry block cannot have predecessors.");
4408 Entry = EntryBlock;
4409 EntryBlock->setParent(this);
4410 }
4411
4412 const VPBlockBase *getExiting() const { return Exiting; }
4413 VPBlockBase *getExiting() { return Exiting; }
4414
4415 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4416 /// ExitingBlock must have no successors.
4417 void setExiting(VPBlockBase *ExitingBlock) {
4418 assert(!ExitingBlock->hasSuccessors() &&
4419 "Exit block cannot have successors.");
4420 Exiting = ExitingBlock;
4421 ExitingBlock->setParent(this);
4422 }
4423
4424 /// Returns the pre-header VPBasicBlock of the loop region.
4426 assert(!isReplicator() && "should only get pre-header of loop regions");
4427 return getSinglePredecessor()->getExitingBasicBlock();
4428 }
4429
4430 /// An indicator whether this region is to generate multiple replicated
4431 /// instances of output IR corresponding to its VPBlockBases.
4432 bool isReplicator() const { return !CanIVInfo; }
4433
4434 /// The method which generates the output IR instructions that correspond to
4435 /// this VPRegionBlock, thereby "executing" the VPlan.
4436 void execute(VPTransformState *State) override;
4437
4438 // Return the cost of this region.
4439 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4440
4441#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4442 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4443 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4444 /// consequtive numbers.
4445 ///
4446 /// Note that the numbering is applied to the whole VPlan, so printing
4447 /// individual regions is consistent with the whole VPlan printing.
4448 void print(raw_ostream &O, const Twine &Indent,
4449 VPSlotTracker &SlotTracker) const override;
4450 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4451#endif
4452
4453 /// Clone all blocks in the single-entry single-exit region of the block and
4454 /// their recipes without updating the operands of the cloned recipes.
4455 VPRegionBlock *clone() override;
4456
4457 /// Remove the current region from its VPlan, connecting its predecessor to
4458 /// its entry, and its exiting block to its successor.
4459 void dissolveToCFGLoop();
4460
4461 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4462 /// a new increment before the terminator and return it. The canonical IV
4463 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4464 VPInstruction *getOrCreateCanonicalIVIncrement();
4465
4466 /// Return the canonical induction variable of the region, null for
4467 /// replicating regions.
4469 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4470 }
4472 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4473 }
4474
4475 /// Return the type of the canonical IV for loop regions.
4477 return CanIVInfo->getRegionValue()->getType();
4478 }
4479
4480 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4481 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4482
4483 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4485 assert(Increment && "Must provide increment to clear");
4486 Increment->dropPoisonGeneratingFlags();
4487 CanIVInfo->clearNUW();
4488 }
4489};
4490
4492 return getParent()->getParent();
4493}
4494
4496 return getParent()->getParent();
4497}
4498
4499/// VPlan models a candidate for vectorization, encoding various decisions take
4500/// to produce efficient output IR, including which branches, basic-blocks and
4501/// output IR instructions to generate, and their cost. VPlan holds a
4502/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4503/// VPBasicBlock.
4504class VPlan {
4505 friend class VPlanPrinter;
4506 friend class VPSlotTracker;
4507
4508 /// VPBasicBlock corresponding to the original preheader. Used to place
4509 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4510 /// rest of VPlan execution.
4511 /// When this VPlan is used for the epilogue vector loop, the entry will be
4512 /// replaced by a new entry block created during skeleton creation.
4513 VPBasicBlock *Entry;
4514
4515 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4516 VPIRBasicBlock *ScalarHeader;
4517
4518 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4519 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4520 /// e.g. if the scalar epilogue always executes.
4522
4523 /// Holds the VFs applicable to this VPlan.
4525
4526 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4527 /// any UF.
4529
4530 /// Holds the name of the VPlan, for printing.
4531 std::string Name;
4532
4533 /// Represents the trip count of the original loop, for folding
4534 /// the tail.
4535 VPValue *TripCount = nullptr;
4536
4537 /// Represents the backedge taken count of the original loop, for folding
4538 /// the tail. It equals TripCount - 1.
4539 VPSymbolicValue *BackedgeTakenCount = nullptr;
4540
4541 /// Represents the vector trip count.
4542 VPSymbolicValue VectorTripCount;
4543
4544 /// Represents the vectorization factor of the loop.
4545 VPSymbolicValue VF;
4546
4547 /// Represents the unroll factor of the loop.
4548 VPSymbolicValue UF;
4549
4550 /// Represents the loop-invariant VF * UF of the vector loop region.
4551 VPSymbolicValue VFxUF;
4552
4553 /// Contains all the external definitions created for this VPlan, as a mapping
4554 /// from IR Values to VPIRValues.
4556
4557 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4558 /// VPlan is destroyed.
4559 SmallVector<VPBlockBase *> CreatedBlocks;
4560
4561 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4562 /// wrapping the original header of the scalar loop. The vector loop will have
4563 /// index type \p IdxTy.
4564 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4565 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4566 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4567 Entry->setPlan(this);
4568 assert(ScalarHeader->getNumSuccessors() == 0 &&
4569 "scalar header must be a leaf node");
4570 }
4571
4572public:
4573 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4574 /// original preheader and scalar header of \p L, to be used as entry and
4575 /// scalar header blocks of the new VPlan. The vector loop will have index
4576 /// type \p IdxTy.
4577 VPlan(Loop *L, Type *IdxTy);
4578
4579 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4580 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4581 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4582 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4583 setEntry(createVPBasicBlock("preheader"));
4584 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4585 }
4586
4588
4590 Entry = VPBB;
4591 VPBB->setPlan(this);
4592 }
4593
4594 /// Generate the IR code for this VPlan.
4595 void execute(VPTransformState *State);
4596
4597 /// Return the cost of this plan.
4599
4600 VPBasicBlock *getEntry() { return Entry; }
4601 const VPBasicBlock *getEntry() const { return Entry; }
4602
4603 /// Returns the preheader of the vector loop region, if one exists, or null
4604 /// otherwise.
4606 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4607 return VectorRegion
4608 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4609 : nullptr;
4610 }
4611
4612 /// Returns the VPRegionBlock of the vector loop.
4615
4616 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4617 /// loop region contains a nested loop region.
4618 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4619
4620 /// Returns the 'middle' block of the plan, that is the block that selects
4621 /// whether to execute the scalar tail loop or the exit block from the loop
4622 /// latch. If there is an early exit from the vector loop, the middle block
4623 /// conceptully has the early exit block as third successor, split accross 2
4624 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4625 /// tail loop or the exit block. If the scalar tail loop or exit block are
4626 /// known to always execute, the middle block may branch directly to that
4627 /// block. This function cannot be called once the vector loop region has been
4628 /// removed.
4630 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4631 assert(
4632 LoopRegion &&
4633 "cannot call the function after vector loop region has been removed");
4634 // The middle block is always the last successor of the region.
4635 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4636 }
4637
4639 return const_cast<VPlan *>(this)->getMiddleBlock();
4640 }
4641
4642 /// Return the VPBasicBlock for the preheader of the scalar loop.
4645 getScalarHeader()->getSinglePredecessor());
4646 }
4647
4648 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4649 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4650
4651 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4652 /// the original scalar loop.
4653 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4654
4655 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4656 /// exit block.
4658
4659 /// Returns true if \p VPBB is an exit block.
4660 bool isExitBlock(VPBlockBase *VPBB);
4661
4662 /// The trip count of the original loop.
4664 assert(TripCount && "trip count needs to be set before accessing it");
4665 return TripCount;
4666 }
4667
4668 /// Set the trip count assuming it is currently null; if it is not - use
4669 /// resetTripCount().
4670 void setTripCount(VPValue *NewTripCount) {
4671 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4672 TripCount = NewTripCount;
4673 }
4674
4675 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4676 /// the original trip count have been replaced.
4677 void resetTripCount(VPValue *NewTripCount) {
4678 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4679 "TripCount must be set when resetting");
4680 TripCount = NewTripCount;
4681 }
4682
4683 /// The backedge taken count of the original loop.
4685 // BTC shares the canonical IV type with VectorTripCount.
4686 if (!BackedgeTakenCount)
4687 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4688 return BackedgeTakenCount;
4689 }
4690 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4691
4692 /// The vector trip count.
4693 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4694
4695 /// Returns the VF of the vector loop region.
4696 VPSymbolicValue &getVF() { return VF; };
4697 const VPSymbolicValue &getVF() const { return VF; };
4698
4699 /// Returns the UF of the vector loop region.
4700 VPSymbolicValue &getUF() { return UF; };
4701
4702 /// Returns VF * UF of the vector loop region.
4703 VPSymbolicValue &getVFxUF() { return VFxUF; }
4704
4707 }
4708
4709 const DataLayout &getDataLayout() const {
4711 }
4712
4713 void addVF(ElementCount VF) { VFs.insert(VF); }
4714
4716 assert(hasVF(VF) && "Cannot set VF not already in plan");
4717 VFs.clear();
4718 VFs.insert(VF);
4719 }
4720
4721 /// Remove \p VF from the plan.
4723 assert(hasVF(VF) && "tried to remove VF not present in plan");
4724 VFs.remove(VF);
4725 }
4726
4727 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4728 bool hasScalableVF() const {
4729 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4730 }
4731
4732 /// Returns an iterator range over all VFs of the plan.
4735 return VFs;
4736 }
4737
4738 /// Returns the single VF of the plan, asserting that the plan has exactly
4739 /// one VF.
4741 assert(VFs.size() == 1 && "expected plan with single VF");
4742 return VFs[0];
4743 }
4744
4745 bool hasScalarVFOnly() const {
4746 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4747 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4748 "Plan with scalar VF should only have a single VF");
4749 return HasScalarVFOnly;
4750 }
4751
4752 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4753
4754 /// Returns the concrete UF of the plan, after unrolling.
4755 unsigned getConcreteUF() const {
4756 assert(UFs.size() == 1 && "Expected a single UF");
4757 return UFs[0];
4758 }
4759
4760 void setUF(unsigned UF) {
4761 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4762 UFs.clear();
4763 UFs.insert(UF);
4764 }
4765
4766 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4767 /// concrete UF.
4768 bool isUnrolled() const { return UFs.size() == 1; }
4769
4770 /// Return a string with the name of the plan and the applicable VFs and UFs.
4771 std::string getName() const;
4772
4773 void setName(const Twine &newName) { Name = newName.str(); }
4774
4775 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4776 /// yet) for \p V.
4778 assert(V && "Trying to get or add the VPIRValue of a null Value");
4779 auto [It, Inserted] = LiveIns.try_emplace(V);
4780 if (Inserted) {
4781 if (auto *CI = dyn_cast<ConstantInt>(V))
4782 It->second = new VPConstantInt(CI);
4783 else
4784 It->second = new VPIRValue(V);
4785 }
4786
4787 assert(isa<VPIRValue>(It->second) &&
4788 "Only VPIRValues should be in mapping");
4789 return It->second;
4790 }
4792 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4793 return getOrAddLiveIn(V->getValue());
4794 }
4795
4796 /// Return a VPIRValue wrapping i1 true.
4797 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4798
4799 /// Return a VPIRValue wrapping i1 false.
4800 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4801
4802 /// Return a VPIRValue wrapping the null value of type \p Ty.
4803 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4804
4805 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4807 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4808 }
4809
4810 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4811 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4812 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4813 }
4814
4815 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4816 /// value.
4818 bool IsSigned = false) {
4819 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4820 }
4821
4822 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4824 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4825 }
4826
4827 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4828 /// otherwise.
4829 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4830
4831 /// Return the list of live-in VPValues available in the VPlan.
4832 auto getLiveIns() const { return LiveIns.values(); }
4833
4834#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4835 /// Print the live-ins of this VPlan to \p O.
4836 void printLiveIns(raw_ostream &O) const;
4837
4838 /// Print this VPlan to \p O.
4839 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4840
4841 /// Print this VPlan in DOT format to \p O.
4842 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4843
4844 /// Dump the plan to stderr (for debugging).
4845 LLVM_DUMP_METHOD void dump() const;
4846#endif
4847
4848 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4849 /// recipes to refer to the clones, and return it.
4851
4852 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4853 /// present. The returned block is owned by the VPlan and deleted once the
4854 /// VPlan is destroyed.
4856 VPRecipeBase *Recipe = nullptr) {
4857 auto *VPB = new VPBasicBlock(Name, Recipe);
4858 CreatedBlocks.push_back(VPB);
4859 return VPB;
4860 }
4861
4862 /// Create a new loop region with a canonical IV using \p CanIVTy and
4863 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
4864 /// to \p Entry and \p Exiting respectively, if provided. The returned block
4865 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4867 const std::string &Name = "",
4868 VPBlockBase *Entry = nullptr,
4869 VPBlockBase *Exiting = nullptr) {
4870 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
4871 CreatedBlocks.push_back(VPB);
4872 return VPB;
4873 }
4874
4875 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4876 /// returned block is owned by the VPlan and deleted once the VPlan is
4877 /// destroyed.
4879 const std::string &Name = "") {
4880 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4881 CreatedBlocks.push_back(VPB);
4882 return VPB;
4883 }
4884
4885 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4886 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4887 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4889
4890 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4891 /// instructions in \p IRBB, except its terminator which is managed by the
4892 /// successors of the block in VPlan. The returned block is owned by the VPlan
4893 /// and deleted once the VPlan is destroyed.
4895
4896 /// Returns true if the VPlan is based on a loop with an early exit. That is
4897 /// the case if the VPlan has either more than one exit block or a single exit
4898 /// block with multiple predecessors (one for the exit via the latch and one
4899 /// via the other early exit).
4900 bool hasEarlyExit() const {
4901 return count_if(ExitBlocks,
4902 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
4903 1 ||
4904 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
4905 }
4906
4907 /// Returns true if the scalar tail may execute after the vector loop, i.e.
4908 /// if the middle block is a predecessor of the scalar preheader. Note that
4909 /// this relies on unneeded branches to the scalar tail loop being removed.
4910 bool hasScalarTail() const {
4911 auto *ScalarPH = getScalarPreheader();
4912 return ScalarPH &&
4913 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
4914 }
4915
4916 /// The type of the canonical induction variable of the vector loop.
4917 Type *getIndexType() const { return VF.getType(); }
4918};
4919
4920#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4921inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
4922 Plan.print(OS);
4923 return OS;
4924}
4925#endif
4926
4927} // end namespace llvm
4928
4929#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:590
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3787
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3781
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4146
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4174
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4221
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4176
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4173
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4199
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4157
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4163
iterator end()
Definition VPlan.h:4183
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4181
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4175
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4234
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:754
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:233
~VPBasicBlock() override
Definition VPlan.h:4167
const_reverse_iterator rbegin() const
Definition VPlan.h:4187
reverse_iterator rend()
Definition VPlan.h:4188
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4161
VPRecipeBase & back()
Definition VPlan.h:4196
const VPRecipeBase & front() const
Definition VPlan.h:4193
const_iterator begin() const
Definition VPlan.h:4182
VPRecipeBase & front()
Definition VPlan.h:4194
const VPRecipeBase & back() const
Definition VPlan.h:4195
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4212
bool empty() const
Definition VPlan.h:4192
const_iterator end() const
Definition VPlan.h:4184
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4207
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4202
reverse_iterator rbegin()
Definition VPlan.h:4186
friend class VPlan
Definition VPlan.h:4147
size_t size() const
Definition VPlan.h:4191
const_reverse_iterator rend() const
Definition VPlan.h:4189
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2810
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2815
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2785
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2805
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2826
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2792
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2821
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2801
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:97
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:318
VPRegionBlock * getParent()
Definition VPlan.h:189
VPBlocksTy & getPredecessors()
Definition VPlan.h:226
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:223
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:388
void setName(const Twine &newName)
Definition VPlan.h:182
size_t getNumSuccessors() const
Definition VPlan.h:240
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:222
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:220
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:340
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:652
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:176
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:276
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:353
size_t getNumPredecessors() const
Definition VPlan.h:241
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:309
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:225
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:346
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:218
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:225
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:174
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:197
const VPRegionBlock * getParent() const
Definition VPlan.h:190
const std::string & getName() const
Definition VPlan.h:180
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:328
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:266
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:300
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:236
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:260
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:325
friend class VPBlockUtils
Definition VPlan.h:98
unsigned getVPBlockID() const
Definition VPlan.h:187
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:367
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:332
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:166
VPBlocksTy & getSuccessors()
Definition VPlan.h:215
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:217
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:289
void setParent(VPRegionBlock *P)
Definition VPlan.h:200
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:282
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:230
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:214
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3291
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3275
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3299
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3272
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4340
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4337
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4341
bool hasNUW() const
Definition VPlan.h:4343
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3819
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3813
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3831
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3825
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3838
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:3942
VPValue * getIndex() const
Definition VPlan.h:3939
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:3941
VPIRValue * getStartValue() const
Definition VPlan.h:3938
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:3930
VPValue * getStepValue() const
Definition VPlan.h:3940
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3925
Type * getScalarType() const
Definition VPlan.h:3936
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3918
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:3910
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3945
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:3903
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3756
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3761
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlan.h:3747
const SCEV * getSCEV() const
Definition VPlan.h:3767
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3752
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3426
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3408
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3390
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3378
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3364
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3356
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3360
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3420
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3358
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2301
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2305
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2318
static bool classof(const VPValue *V)
Definition VPlan.h:2315
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2341
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2346
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2330
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2338
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2311
VPValue * getStartValue() const
Definition VPlan.h:2333
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2350
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2056
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2073
unsigned getOpcode() const
Definition VPlan.h:2069
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2049
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4299
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:462
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4323
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4313
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4300
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:487
Class to record and manage LLVM IR flags.
Definition VPlan.h:691
FastMathFlagsTy FMFs
Definition VPlan.h:779
ReductionFlagsTy ReductionFlags
Definition VPlan.h:781
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1034
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:872
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:852
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:838
WrapFlagsTy WrapFlags
Definition VPlan.h:773
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:831
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:996
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1060
TruncFlagsTy TruncFlags
Definition VPlan.h:774
CmpInst::Predicate getPredicate() const
Definition VPlan.h:968
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1044
uint8_t AllFlags[2]
Definition VPlan.h:782
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1004
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:877
ExactFlagsTy ExactFlags
Definition VPlan.h:776
bool hasNoSignedWrap() const
Definition VPlan.h:1023
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1048
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:843
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:848
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:857
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:826
uint8_t GEPFlagsStorage
Definition VPlan.h:777
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:862
bool isNonNeg() const
Definition VPlan.h:1006
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:986
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:991
DisjointFlagsTy DisjointFlags
Definition VPlan.h:775
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:976
bool hasNoUnsignedWrap() const
Definition VPlan.h:1012
FCmpFlagsTy FCmpFlags
Definition VPlan.h:780
NonNegFlagsTy NonNegFlags
Definition VPlan.h:778
bool isReductionInLoop() const
Definition VPlan.h:1066
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:888
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:925
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:867
uint8_t CmpPredStorage
Definition VPlan.h:772
RecurKind getRecurKind() const
Definition VPlan.h:1054
VPIRFlags(Instruction &I)
Definition VPlan.h:788
Instruction & getInstruction() const
Definition VPlan.h:1715
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1723
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1702
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1729
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1717
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1690
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1171
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1207
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1179
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1191
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1520
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1562
static bool classof(const VPUser *R)
Definition VPlan.h:1547
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1528
Type * getResultType() const
Definition VPlan.h:1568
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1551
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1226
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1452
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1472
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1393
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1332
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1323
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1339
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1313
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1326
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1266
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1317
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1261
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1258
@ VScale
Returns the value for vscale.
Definition VPlan.h:1335
@ CanonicalIVIncrementForPart
Definition VPlan.h:1242
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1269
bool hasResult() const
Definition VPlan.h:1417
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1475
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1457
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1497
unsigned getOpcode() const
Definition VPlan.h:1401
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1500
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1466
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1442
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:2914
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2920
static bool classof(const VPUser *U)
Definition VPlan.h:2896
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2863
Instruction * getInsertPos() const
Definition VPlan.h:2918
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2891
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:2916
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:2908
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:2937
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:2902
A recipe for interleaved memory operations with vector-predication intrinsics.
Definition VPlan.h:2989
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3017
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3011
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3024
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3004
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:2991
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:2947
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:2974
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2957
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2968
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2949
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1580
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1609
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1604
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4290
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1629
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1589
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1614
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1618
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3465
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3476
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3461
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:405
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:552
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4491
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:563
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:479
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:557
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:528
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:407
const VPBasicBlock * getParent() const
Definition VPlan.h:480
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:533
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:423
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:525
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:469
A VPValue defined by a recipe that produces one or more values.
Definition VPlanValue.h:309
LLVM_ABI_FOR_TEST VPRecipeValue(VPRecipeBase *Def, Value *UV=nullptr)
Definition VPlan.cpp:144
friend class VPValue
Definition VPlanValue.h:310
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3172
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3151
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3175
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3162
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2746
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2732
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2711
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2725
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2758
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2740
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2699
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2749
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2763
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2755
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2743
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3040
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3049
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3114
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3083
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3098
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3125
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3127
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3110
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3063
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3112
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3069
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3116
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3123
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3118
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3077
static bool classof(const VPUser *U)
Definition VPlan.h:3088
static bool classof(const VPValue *VPV)
Definition VPlan.h:3093
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3132
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4356
const VPBlockBase * getEntry() const
Definition VPlan.h:4400
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4432
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4417
VPBlockBase * getExiting()
Definition VPlan.h:4413
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4471
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4405
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4476
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4481
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4484
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4468
const VPBlockBase * getExiting() const
Definition VPlan.h:4412
VPBlockBase * getEntry()
Definition VPlan.h:4401
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4425
friend class VPlan
Definition VPlan.h:4357
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4396
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:209
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3194
bool isSingleScalar() const
Definition VPlan.h:3235
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3202
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3247
bool isPredicated() const
Definition VPlan.h:3237
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3216
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3240
unsigned getOpcode() const
Definition VPlan.h:3259
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3254
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4037
VPValue * getStepValue() const
Definition VPlan.h:4010
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4004
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3977
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4023
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3989
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4018
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4014
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:3970
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4031
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:609
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:615
static bool classof(const VPValue *V)
Definition VPlan.h:663
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:676
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:619
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:679
static bool classof(const VPUser *U)
Definition VPlan.h:668
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:611
This class can be used to assign names to VPValues.
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition VPlan.h:1159
VPValue * getUnrollPartOperand(const VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(const VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:335
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1526
operand_range operands()
Definition VPlanValue.h:403
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:379
unsigned getNumOperands() const
Definition VPlanValue.h:373
operand_iterator op_end()
Definition VPlanValue.h:401
operand_iterator op_begin()
Definition VPlanValue.h:399
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:374
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:354
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:397
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:396
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:49
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:138
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:128
friend class VPRecipeValue
Definition VPlanValue.h:52
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:74
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:202
unsigned getNumUsers() const
Definition VPlanValue.h:113
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2182
VPValue * getVFValue() const
Definition VPlan.h:2171
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2168
int64_t getStride() const
Definition VPlan.h:2169
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2203
VPValue * getOffset() const
Definition VPlan.h:2172
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2196
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2158
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2189
VPValue * getPointer() const
Definition VPlan.h:2170
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2240
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2242
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2249
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2227
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2265
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2256
A recipe for widening Call instructions using library calls.
Definition VPlan.h:1988
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1995
const_operand_range args() const
Definition VPlan.h:2029
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2010
operand_range args()
Definition VPlan.h:2028
Function * getCalledScalarFunction() const
Definition VPlan.h:2024
~VPWidenCallRecipe() override=default
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
~VPWidenCanonicalIVRecipe() override=default
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV)
Definition VPlan.h:3856
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:3873
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:3880
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3861
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1836
Instruction::CastOps getOpcode() const
Definition VPlan.h:1874
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition VPlan.h:1877
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1844
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1859
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2120
Type * getSourceElementType() const
Definition VPlan.h:2125
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2128
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2112
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2098
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2433
static bool classof(const VPValue *V)
Definition VPlan.h:2381
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2400
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2418
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2393
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2408
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2411
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2369
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2396
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2416
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2425
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2376
const VPValue * getVFValue() const
Definition VPlan.h:2403
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2386
const VPValue * getStepValue() const
Definition VPlan.h:2397
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2494
const TruncInst * getTruncInst() const
Definition VPlan.h:2510
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2488
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2498
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2480
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2454
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2509
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2463
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2525
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2505
Type * getScalarType() const
Returns the scalar type of the induction.
Definition VPlan.h:2518
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1888
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1919
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1959
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:1968
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1905
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:1974
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1940
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:1971
~VPWidenIntrinsicRecipe() override=default
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition VPlan.h:1962
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3504
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3539
static bool classof(const VPUser *U)
Definition VPlan.h:3533
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition VPlan.h:3558
Instruction & Ingredient
Definition VPlan.h:3495
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3522
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3514
Instruction & getIngredient() const
Definition VPlan.h:3566
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3501
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3526
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3549
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3498
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3545
void setMask(VPValue *Mask)
Definition VPlan.h:3506
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3555
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3542
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2617
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2595
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingvValues, debug location DL and Name.
Definition VPlan.h:2590
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2552
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2561
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2542
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1780
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1800
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1827
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1784
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1792
~VPWidenRecipe() override=default
unsigned getOpcode() const
Definition VPlan.h:1817
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4504
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4829
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1176
friend class VPSlotTracker
Definition VPlan.h:4506
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1152
bool hasVF(ElementCount VF) const
Definition VPlan.h:4727
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4740
const DataLayout & getDataLayout() const
Definition VPlan.h:4709
LLVMContext & getContext() const
Definition VPlan.h:4705
VPBasicBlock * getEntry()
Definition VPlan.h:4600
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:4917
void setName(const Twine &newName)
Definition VPlan.h:4773
bool hasScalableVF() const
Definition VPlan.h:4728
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4663
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4684
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4734
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:901
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:876
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4791
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:909
const VPBasicBlock * getEntry() const
Definition VPlan.h:4601
friend class VPlanPrinter
Definition VPlan.h:4505
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4800
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4823
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4703
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4806
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4878
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1311
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4832
bool hasUF(unsigned UF) const
Definition VPlan.h:4752
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4653
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4581
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4693
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4690
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4777
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4866
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4803
void setVF(ElementCount VF)
Definition VPlan.h:4715
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4768
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1067
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:4900
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1049
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1082
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4755
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4817
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4638
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4670
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4677
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4629
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4589
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4855
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1317
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4722
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4797
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4605
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1182
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4700
bool hasScalarVFOnly() const
Definition VPlan.h:4745
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4643
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:919
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1135
void addVF(ElementCount VF)
Definition VPlan.h:4713
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4649
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1091
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4696
void setUF(unsigned UF)
Definition VPlan.h:4760
const VPSymbolicValue & getVF() const
Definition VPlan.h:4697
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:4910
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1223
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4811
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:557
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1109
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2672
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:82
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:92
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2670
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:77
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
static VPIRMetadata * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4102
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4092
static VPIRMetadata * castFailed()
Used by inherited doCastIfPossible to dyn_cast.
Definition VPlan.h:4130
static VPPhiAccessors * castFailed()
Used by inherited doCastIfPossible to dyn_cast.
Definition VPlan.h:4073
static VPPhiAccessors * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4059
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4053
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2664
Possible variants of a reduction.
Definition VPlan.h:2662
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2667
unsigned VFScaleFactor
Definition VPlan.h:2668
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:258
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2633
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2645
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2624
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:723
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:728
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:718
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:711
PHINode & getIRPhi()
Definition VPlan.h:1761
VPIRPhi(PHINode &PN)
Definition VPlan.h:1749
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1751
static bool classof(const VPUser *U)
Definition VPlan.h:1756
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1772
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:240
Type * getType() const
Returns the type of the underlying IR value.
Definition VPlan.cpp:142
static bool classof(const VPUser *U)
Definition VPlan.h:1648
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1644
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1663
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1678
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1658
static bool classof(const VPValue *V)
Definition VPlan.h:1653
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1113
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1146
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1119
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1114
static bool classof(const VPValue *V)
Definition VPlan.h:1139
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1134
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:280
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3612
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3625
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3613
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3635
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3572
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3573
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3592
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3581
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3696
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3708
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3697
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3721
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3711
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3653
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3654
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3671
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3662
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3677