LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPBuilder;
63class VPDominatorTree;
64class VPRegionBlock;
65class VPlan;
66class VPLane;
68class Value;
70
71struct VPCostContext;
72
73using VPlanPtr = std::unique_ptr<VPlan>;
74
75/// \enum UncountableExitStyle
76/// Different methods of handling early exits.
77///
80 /// No side effects to worry about, so we can process any uncountable exits
81 /// in the loop and branch either to the middle block if the trip count was
82 /// reached, or an early exitblock to determine which exit was taken.
84 /// All memory operations other than the load(s) required to determine whether
85 /// an uncountable exit occurre will be masked based on that condition. If an
86 /// uncountable exit is taken, then all lanes before the exiting lane will
87 /// complete, leaving just the final lane to execute in the scalar tail.
89};
90
91/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
92/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
94 friend class VPBlockUtils;
95
96 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
97
98 /// An optional name for the block.
99 std::string Name;
100
101 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
102 /// it is a topmost VPBlockBase.
103 VPRegionBlock *Parent = nullptr;
104
105 /// List of predecessor blocks.
107
108 /// List of successor blocks.
110
111 /// VPlan containing the block. Can only be set on the entry block of the
112 /// plan.
113 VPlan *Plan = nullptr;
114
115 /// Add \p Successor as the last successor to this block.
116 void appendSuccessor(VPBlockBase *Successor) {
117 assert(Successor && "Cannot add nullptr successor!");
118 Successors.push_back(Successor);
119 }
120
121 /// Add \p Predecessor as the last predecessor to this block.
122 void appendPredecessor(VPBlockBase *Predecessor) {
123 assert(Predecessor && "Cannot add nullptr predecessor!");
124 Predecessors.push_back(Predecessor);
125 }
126
127 /// Remove \p Predecessor from the predecessors of this block.
128 void removePredecessor(VPBlockBase *Predecessor) {
129 auto Pos = find(Predecessors, Predecessor);
130 assert(Pos && "Predecessor does not exist");
131 Predecessors.erase(Pos);
132 }
133
134 /// Remove \p Successor from the successors of this block.
135 void removeSuccessor(VPBlockBase *Successor) {
136 auto Pos = find(Successors, Successor);
137 assert(Pos && "Successor does not exist");
138 Successors.erase(Pos);
139 }
140
141 /// This function replaces one predecessor with another, useful when
142 /// trying to replace an old block in the CFG with a new one.
143 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
144 auto I = find(Predecessors, Old);
145 assert(I != Predecessors.end());
146 assert(Old->getParent() == New->getParent() &&
147 "replaced predecessor must have the same parent");
148 *I = New;
149 }
150
151 /// This function replaces one successor with another, useful when
152 /// trying to replace an old block in the CFG with a new one.
153 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
154 auto I = find(Successors, Old);
155 assert(I != Successors.end());
156 assert(Old->getParent() == New->getParent() &&
157 "replaced successor must have the same parent");
158 *I = New;
159 }
160
161protected:
162 VPBlockBase(const unsigned char SC, const std::string &N)
163 : SubclassID(SC), Name(N) {}
164
165public:
166 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
167 /// that are actually instantiated. Values of this enumeration are kept in the
168 /// SubclassID field of the VPBlockBase objects. They are used for concrete
169 /// type identification.
170 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
171
173
174 virtual ~VPBlockBase() = default;
175
176 const std::string &getName() const { return Name; }
177
178 void setName(const Twine &newName) { Name = newName.str(); }
179
180 /// \return an ID for the concrete type of this object.
181 /// This is used to implement the classof checks. This should not be used
182 /// for any other purpose, as the values may change as LLVM evolves.
183 unsigned getVPBlockID() const { return SubclassID; }
184
185 VPRegionBlock *getParent() { return Parent; }
186 const VPRegionBlock *getParent() const { return Parent; }
187
188 /// \return A pointer to the plan containing the current block.
189 VPlan *getPlan();
190 const VPlan *getPlan() const;
191
192 /// Sets the pointer of the plan containing the block. The block must be the
193 /// entry block into the VPlan.
194 void setPlan(VPlan *ParentPlan);
195
196 void setParent(VPRegionBlock *P) { Parent = P; }
197
198 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
199 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
200 /// VPBlockBase is a VPBasicBlock, it is returned.
201 const VPBasicBlock *getEntryBasicBlock() const;
202 VPBasicBlock *getEntryBasicBlock();
203
204 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
205 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
206 /// VPBlockBase is a VPBasicBlock, it is returned.
207 const VPBasicBlock *getExitingBasicBlock() const;
208 VPBasicBlock *getExitingBasicBlock();
209
210 const VPBlocksTy &getSuccessors() const { return Successors; }
211 VPBlocksTy &getSuccessors() { return Successors; }
212
213 /// Returns true if this block has any successors.
214 bool hasSuccessors() const { return !Successors.empty(); }
215 /// Returns true if this block has any predecessors.
216 bool hasPredecessors() const { return !Predecessors.empty(); }
217
220
221 const VPBlocksTy &getPredecessors() const { return Predecessors; }
222 VPBlocksTy &getPredecessors() { return Predecessors; }
223
224 /// \return the successor of this VPBlockBase if it has a single successor.
225 /// Otherwise return a null pointer.
227 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
228 }
229
230 /// \return the predecessor of this VPBlockBase if it has a single
231 /// predecessor. Otherwise return a null pointer.
233 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
234 }
235
236 size_t getNumSuccessors() const { return Successors.size(); }
237 size_t getNumPredecessors() const { return Predecessors.size(); }
238
239 /// An Enclosing Block of a block B is any block containing B, including B
240 /// itself. \return the closest enclosing block starting from "this", which
241 /// has successors. \return the root enclosing block if all enclosing blocks
242 /// have no successors.
243 VPBlockBase *getEnclosingBlockWithSuccessors();
244
245 /// \return the closest enclosing block starting from "this", which has
246 /// predecessors. \return the root enclosing block if all enclosing blocks
247 /// have no predecessors.
248 VPBlockBase *getEnclosingBlockWithPredecessors();
249
250 /// \return the successors either attached directly to this VPBlockBase or, if
251 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
252 /// successors of its own, search recursively for the first enclosing
253 /// VPRegionBlock that has successors and return them. If no such
254 /// VPRegionBlock exists, return the (empty) successors of the topmost
255 /// VPBlockBase reached.
257 return getEnclosingBlockWithSuccessors()->getSuccessors();
258 }
259
260 /// \return the hierarchical successor of this VPBlockBase if it has a single
261 /// hierarchical successor. Otherwise return a null pointer.
263 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
264 }
265
266 /// \return the predecessors either attached directly to this VPBlockBase or,
267 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
268 /// predecessors of its own, search recursively for the first enclosing
269 /// VPRegionBlock that has predecessors and return them. If no such
270 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
271 /// VPBlockBase reached.
273 return getEnclosingBlockWithPredecessors()->getPredecessors();
274 }
275
276 /// \return the hierarchical predecessor of this VPBlockBase if it has a
277 /// single hierarchical predecessor. Otherwise return a null pointer.
281
282 /// Set a given VPBlockBase \p Successor as the single successor of this
283 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
284 /// This VPBlockBase must have no successors.
286 assert(Successors.empty() && "Setting one successor when others exist.");
287 assert(Successor->getParent() == getParent() &&
288 "connected blocks must have the same parent");
289 appendSuccessor(Successor);
290 }
291
292 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
293 /// successors of this VPBlockBase. This VPBlockBase is not added as
294 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
295 /// successors.
296 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
297 assert(Successors.empty() && "Setting two successors when others exist.");
298 appendSuccessor(IfTrue);
299 appendSuccessor(IfFalse);
300 }
301
302 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
303 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
304 /// as successor of any VPBasicBlock in \p NewPreds.
306 assert(Predecessors.empty() && "Block predecessors already set.");
307 for (auto *Pred : NewPreds)
308 appendPredecessor(Pred);
309 }
310
311 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
312 /// This VPBlockBase must have no successors. This VPBlockBase is not added
313 /// as predecessor of any VPBasicBlock in \p NewSuccs.
315 assert(Successors.empty() && "Block successors already set.");
316 for (auto *Succ : NewSuccs)
317 appendSuccessor(Succ);
318 }
319
320 /// Remove all the predecessor of this block.
321 void clearPredecessors() { Predecessors.clear(); }
322
323 /// Remove all the successors of this block.
324 void clearSuccessors() { Successors.clear(); }
325
326 /// Swap predecessors of the block. The block must have exactly 2
327 /// predecessors.
329 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
330 std::swap(Predecessors[0], Predecessors[1]);
331 }
332
333 /// Swap successors of the block. The block must have exactly 2 successors.
334 // TODO: This should be part of introducing conditional branch recipes rather
335 // than being independent.
337 assert(Successors.size() == 2 && "must have 2 successors to swap");
338 std::swap(Successors[0], Successors[1]);
339 }
340
341 /// Returns the index for \p Pred in the blocks predecessors list.
342 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
343 assert(count(Predecessors, Pred) == 1 &&
344 "must have Pred exactly once in Predecessors");
345 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
346 }
347
348 /// Returns the index for \p Succ in the blocks successor list.
349 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
350 assert(count(Successors, Succ) == 1 &&
351 "must have Succ exactly once in Successors");
352 return std::distance(Successors.begin(), find(Successors, Succ));
353 }
354
355 /// The method which generates the output IR that correspond to this
356 /// VPBlockBase, thereby "executing" the VPlan.
357 virtual void execute(VPTransformState *State) = 0;
358
359 /// Return the cost of the block.
361
362#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
363 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
364 OS << getName();
365 }
366
367 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
368 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
369 /// consequtive numbers.
370 ///
371 /// Note that the numbering is applied to the whole VPlan, so printing
372 /// individual blocks is consistent with the whole VPlan printing.
373 virtual void print(raw_ostream &O, const Twine &Indent,
374 VPSlotTracker &SlotTracker) const = 0;
375
376 /// Print plain-text dump of this VPlan to \p O.
377 void print(raw_ostream &O) const;
378
379 /// Print the successors of this block to \p O, prefixing all lines with \p
380 /// Indent.
381 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
382
383 /// Dump this VPBlockBase to dbgs().
384 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
385#endif
386
387 /// Clone the current block and it's recipes without updating the operands of
388 /// the cloned recipes, including all blocks in the single-entry single-exit
389 /// region for VPRegionBlocks.
390 virtual VPBlockBase *clone() = 0;
391};
392
393/// VPRecipeBase is a base class modeling a sequence of one or more output IR
394/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
395/// and is responsible for deleting its defined values. Single-value
396/// recipes must inherit from VPSingleDef instead of inheriting from both
397/// VPRecipeBase and VPValue separately.
399 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
400 public VPDef,
401 public VPUser {
402 friend VPBasicBlock;
403 friend class VPBlockUtils;
404
405 /// Subclass identifier (for isa/dyn_cast).
406 const unsigned char SubclassID;
407
408 /// Each VPRecipe belongs to a single VPBasicBlock.
409 VPBasicBlock *Parent = nullptr;
410
411 /// The debug location for the recipe.
412 DebugLoc DL;
413
414public:
415 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
416 /// that is actually instantiated. Values of this enumeration are kept in the
417 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
418 /// type identification.
419 using VPRecipeTy = enum {
420 VPBranchOnMaskSC,
421 VPDerivedIVSC,
422 VPExpandSCEVSC,
423 VPExpressionSC,
424 VPIRInstructionSC,
425 VPInstructionSC,
426 VPInterleaveEVLSC,
427 VPInterleaveSC,
428 VPReductionEVLSC,
429 VPReductionSC,
430 VPReplicateSC,
431 VPScalarIVStepsSC,
432 VPVectorPointerSC,
433 VPVectorEndPointerSC,
434 VPWidenCallSC,
435 VPWidenCanonicalIVSC,
436 VPWidenCastSC,
437 VPWidenGEPSC,
438 VPWidenIntrinsicSC,
439 VPWidenMemIntrinsicSC,
440 VPWidenLoadEVLSC,
441 VPWidenLoadSC,
442 VPWidenStoreEVLSC,
443 VPWidenStoreSC,
444 VPWidenSC,
445 VPBlendSC,
446 VPHistogramSC,
447 // START: Phi-like recipes. Need to be kept together.
448 VPWidenPHISC,
449 VPPredInstPHISC,
450 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
451 // VPHeaderPHIRecipe need to be kept together.
452 VPCurrentIterationPHISC,
453 VPActiveLaneMaskPHISC,
454 VPFirstOrderRecurrencePHISC,
455 VPWidenIntOrFpInductionSC,
456 VPWidenPointerInductionSC,
457 VPReductionPHISC,
458 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
459 // END: Phi-like recipes
460 VPFirstPHISC = VPWidenPHISC,
461 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
462 VPLastHeaderPHISC = VPReductionPHISC,
463 VPLastPHISC = VPReductionPHISC,
464 };
465
466 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
468 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
469
470 ~VPRecipeBase() override = default;
471
472 /// Clone the current recipe.
473 virtual VPRecipeBase *clone() = 0;
474
475 /// \return the VPBasicBlock which this VPRecipe belongs to.
476 VPBasicBlock *getParent() { return Parent; }
477 const VPBasicBlock *getParent() const { return Parent; }
478
479 /// \return the VPRegionBlock which the recipe belongs to.
480 VPRegionBlock *getRegion();
481 const VPRegionBlock *getRegion() const;
482
483 /// The method which generates the output IR instructions that correspond to
484 /// this VPRecipe, thereby "executing" the VPlan.
485 virtual void execute(VPTransformState &State) = 0;
486
487 /// Return the cost of this recipe, taking into account if the cost
488 /// computation should be skipped and the ForceTargetInstructionCost flag.
489 /// Also takes care of printing the cost for debugging.
491
492 /// Insert an unlinked recipe into a basic block immediately before
493 /// the specified recipe.
494 void insertBefore(VPRecipeBase *InsertPos);
495 /// Insert an unlinked recipe into \p BB immediately before the insertion
496 /// point \p IP;
497 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
498
499 /// Insert an unlinked Recipe into a basic block immediately after
500 /// the specified Recipe.
501 void insertAfter(VPRecipeBase *InsertPos);
502
503 /// Unlink this recipe from its current VPBasicBlock and insert it into
504 /// the VPBasicBlock that MovePos lives in, right after MovePos.
505 void moveAfter(VPRecipeBase *MovePos);
506
507 /// Unlink this recipe and insert into BB before I.
508 ///
509 /// \pre I is a valid iterator into BB.
510 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
511
512 /// This method unlinks 'this' from the containing basic block, but does not
513 /// delete it.
514 void removeFromParent();
515
516 /// This method unlinks 'this' from the containing basic block and deletes it.
517 ///
518 /// \returns an iterator pointing to the element after the erased one
520
521 /// \return an ID for the concrete type of this object.
522 unsigned getVPRecipeID() const { return SubclassID; }
523
524 /// Method to support type inquiry through isa, cast, and dyn_cast.
525 static inline bool classof(const VPDef *D) {
526 // All VPDefs are also VPRecipeBases.
527 return true;
528 }
529
530 static inline bool classof(const VPUser *U) { return true; }
531
532 /// Returns true if the recipe may have side-effects.
533 bool mayHaveSideEffects() const;
534
535 /// Return true if we can safely execute this recipe unconditionally even if
536 /// it is masked originally.
537 bool isSafeToSpeculativelyExecute() const;
538
539 /// Returns true for PHI-like recipes.
540 bool isPhi() const;
541
542 /// Returns true if the recipe may read from memory.
543 bool mayReadFromMemory() const;
544
545 /// Returns true if the recipe may write to memory.
546 bool mayWriteToMemory() const;
547
548 /// Returns true if the recipe may read from or write to memory.
549 bool mayReadOrWriteMemory() const {
551 }
552
553 /// Returns the debug location of the recipe.
554 DebugLoc getDebugLoc() const { return DL; }
555
556 /// Set the recipe's debug location to \p NewDL.
557 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
558
559#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
560 /// Dump the recipe to stderr (for debugging).
561 LLVM_ABI_FOR_TEST void dump() const;
562
563 /// Print the recipe, delegating to printRecipe().
564 void print(raw_ostream &O, const Twine &Indent,
566#endif
567
568protected:
569 /// Compute the cost of this recipe either using a recipe's specialized
570 /// implementation or using the legacy cost model and the underlying
571 /// instructions.
572 virtual InstructionCost computeCost(ElementCount VF,
573 VPCostContext &Ctx) const;
574
575#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
576 /// Each concrete VPRecipe prints itself, without printing common information,
577 /// like debug info or metadata.
578 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
579 VPSlotTracker &SlotTracker) const = 0;
580#endif
581};
582
583// Helper macro to define common classof implementations for recipes.
584#define VP_CLASSOF_IMPL(VPRecipeID) \
585 static inline bool classof(const VPRecipeBase *R) { \
586 return R->getVPRecipeID() == VPRecipeID; \
587 } \
588 static inline bool classof(const VPValue *V) { \
589 auto *R = V->getDefiningRecipe(); \
590 return R && R->getVPRecipeID() == VPRecipeID; \
591 } \
592 static inline bool classof(const VPUser *U) { \
593 auto *R = dyn_cast<VPRecipeBase>(U); \
594 return R && R->getVPRecipeID() == VPRecipeID; \
595 } \
596 static inline bool classof(const VPSingleDefRecipe *R) { \
597 return R->getVPRecipeID() == VPRecipeID; \
598 }
599
600/// Return the scalar type of \p V. If \p V's scalar type has not been set
601/// because the defining recipe was not assigned one yet, fall back to
602/// VPTypeAnalysis using the plan of the defining recipe.
603/// TODO: Remove once all VPRecipeValues have been migrated to carry their
604/// types.
605LLVM_ABI Type *getScalarTypeOrInfer(VPValue *V);
606
607/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
608/// or more output IR that define a single result VPValue. Note that
609/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
611public:
612 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
614 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
615
616 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
618 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
619
620 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
621 Type *ResultTy, Value *UV = nullptr,
623 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
624
625 static inline bool classof(const VPRecipeBase *R) {
626 switch (R->getVPRecipeID()) {
627 case VPRecipeBase::VPDerivedIVSC:
628 case VPRecipeBase::VPExpandSCEVSC:
629 case VPRecipeBase::VPExpressionSC:
630 case VPRecipeBase::VPInstructionSC:
631 case VPRecipeBase::VPReductionEVLSC:
632 case VPRecipeBase::VPReductionSC:
633 case VPRecipeBase::VPReplicateSC:
634 case VPRecipeBase::VPScalarIVStepsSC:
635 case VPRecipeBase::VPVectorPointerSC:
636 case VPRecipeBase::VPVectorEndPointerSC:
637 case VPRecipeBase::VPWidenCallSC:
638 case VPRecipeBase::VPWidenCanonicalIVSC:
639 case VPRecipeBase::VPWidenCastSC:
640 case VPRecipeBase::VPWidenGEPSC:
641 case VPRecipeBase::VPWidenIntrinsicSC:
642 case VPRecipeBase::VPWidenMemIntrinsicSC:
643 case VPRecipeBase::VPWidenSC:
644 case VPRecipeBase::VPBlendSC:
645 case VPRecipeBase::VPPredInstPHISC:
646 case VPRecipeBase::VPCurrentIterationPHISC:
647 case VPRecipeBase::VPActiveLaneMaskPHISC:
648 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
649 case VPRecipeBase::VPWidenPHISC:
650 case VPRecipeBase::VPWidenIntOrFpInductionSC:
651 case VPRecipeBase::VPWidenPointerInductionSC:
652 case VPRecipeBase::VPReductionPHISC:
653 case VPRecipeBase::VPWidenLoadEVLSC:
654 case VPRecipeBase::VPWidenLoadSC:
655 return true;
656 case VPRecipeBase::VPBranchOnMaskSC:
657 case VPRecipeBase::VPInterleaveEVLSC:
658 case VPRecipeBase::VPInterleaveSC:
659 case VPRecipeBase::VPIRInstructionSC:
660 case VPRecipeBase::VPWidenStoreEVLSC:
661 case VPRecipeBase::VPWidenStoreSC:
662 case VPRecipeBase::VPHistogramSC:
663 return false;
664 }
665 llvm_unreachable("Unhandled VPRecipeID");
666 }
667
668 static inline bool classof(const VPValue *V) {
669 auto *R = V->getDefiningRecipe();
670 return R && classof(R);
671 }
672
673 static inline bool classof(const VPUser *U) {
674 auto *R = dyn_cast<VPRecipeBase>(U);
675 return R && classof(R);
676 }
677
678 VPSingleDefRecipe *clone() override = 0;
679
680 /// Returns the underlying instruction.
687
688#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
689 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
691#endif
692};
693
694/// Class to record and manage LLVM IR flags.
697 enum class OperationType : unsigned char {
698 Cmp,
699 FCmp,
700 OverflowingBinOp,
701 Trunc,
702 DisjointOp,
703 PossiblyExactOp,
704 GEPOp,
705 FPMathOp,
706 NonNegOp,
707 ReductionOp,
708 Other
709 };
710
711public:
712 struct WrapFlagsTy {
713 char HasNUW : 1;
714 char HasNSW : 1;
715
717 };
718
720 char HasNUW : 1;
721 char HasNSW : 1;
722
724 };
725
730
732 char NonNeg : 1;
733 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
734 };
735
736private:
737 struct ExactFlagsTy {
738 char IsExact : 1;
739 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
740 };
741 struct FastMathFlagsTy {
742 char AllowReassoc : 1;
743 char NoNaNs : 1;
744 char NoInfs : 1;
745 char NoSignedZeros : 1;
746 char AllowReciprocal : 1;
747 char AllowContract : 1;
748 char ApproxFunc : 1;
749
750 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
751 };
752 /// Holds both the predicate and fast-math flags for floating-point
753 /// comparisons.
754 struct FCmpFlagsTy {
755 uint8_t CmpPredStorage;
756 FastMathFlagsTy FMFs;
757 };
758 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
759 struct ReductionFlagsTy {
760 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
761 // additional kinds.
762 unsigned char Kind : 6;
763 // TODO: Derive order/in-loop from plan and remove here.
764 unsigned char IsOrdered : 1;
765 unsigned char IsInLoop : 1;
766 FastMathFlagsTy FMFs;
767
768 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
769 FastMathFlags FMFs)
770 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
771 IsInLoop(IsInLoop), FMFs(FMFs) {}
772 };
773
774 OperationType OpType;
775
776 union {
781 ExactFlagsTy ExactFlags;
784 FastMathFlagsTy FMFs;
785 FCmpFlagsTy FCmpFlags;
786 ReductionFlagsTy ReductionFlags;
788 };
789
790public:
791 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
792
794 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
795 OpType = OperationType::FCmp;
797 FCmp->getPredicate());
798 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
799 FCmpFlags.FMFs = FCmp->getFastMathFlags();
800 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
801 OpType = OperationType::Cmp;
803 Op->getPredicate());
804 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
805 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
806 OpType = OperationType::DisjointOp;
807 DisjointFlags.IsDisjoint = Op->isDisjoint();
808 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
809 OpType = OperationType::OverflowingBinOp;
810 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
811 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
812 OpType = OperationType::Trunc;
813 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
814 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
815 OpType = OperationType::PossiblyExactOp;
816 ExactFlags.IsExact = Op->isExact();
817 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
818 OpType = OperationType::GEPOp;
819 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
820 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
821 "wrap flags truncated");
822 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
823 OpType = OperationType::NonNegOp;
824 NonNegFlags.NonNeg = PNNI->hasNonNeg();
825 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
826 OpType = OperationType::FPMathOp;
827 FMFs = Op->getFastMathFlags();
828 }
829 }
830
831 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
833 assert(getPredicate() == Pred && "predicate truncated");
834 }
835
837 : OpType(OperationType::FCmp), AllFlags() {
839 assert(getPredicate() == Pred && "predicate truncated");
840 FCmpFlags.FMFs = FMFs;
841 }
842
844 : OpType(OperationType::OverflowingBinOp), AllFlags() {
845 this->WrapFlags = WrapFlags;
846 }
847
849 : OpType(OperationType::Trunc), AllFlags() {
850 this->TruncFlags = TruncFlags;
851 }
852
853 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
854 this->FMFs = FMFs;
855 }
856
858 : OpType(OperationType::DisjointOp), AllFlags() {
859 this->DisjointFlags = DisjointFlags;
860 }
861
863 : OpType(OperationType::NonNegOp), AllFlags() {
864 this->NonNegFlags = NonNegFlags;
865 }
866
867 VPIRFlags(ExactFlagsTy ExactFlags)
868 : OpType(OperationType::PossiblyExactOp), AllFlags() {
869 this->ExactFlags = ExactFlags;
870 }
871
873 : OpType(OperationType::GEPOp), AllFlags() {
874 GEPFlagsStorage = GEPFlags.getRaw();
875 }
876
877 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
878 : OpType(OperationType::ReductionOp), AllFlags() {
879 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
880 }
881
883 OpType = Other.OpType;
884 AllFlags[0] = Other.AllFlags[0];
885 AllFlags[1] = Other.AllFlags[1];
886 }
887
888 /// Only keep flags also present in \p Other. \p Other must have the same
889 /// OpType as the current object.
890 void intersectFlags(const VPIRFlags &Other);
891
892 /// Drop all poison-generating flags.
894 // NOTE: This needs to be kept in-sync with
895 // Instruction::dropPoisonGeneratingFlags.
896 switch (OpType) {
897 case OperationType::OverflowingBinOp:
898 WrapFlags.HasNUW = false;
899 WrapFlags.HasNSW = false;
900 break;
901 case OperationType::Trunc:
902 TruncFlags.HasNUW = false;
903 TruncFlags.HasNSW = false;
904 break;
905 case OperationType::DisjointOp:
906 DisjointFlags.IsDisjoint = false;
907 break;
908 case OperationType::PossiblyExactOp:
909 ExactFlags.IsExact = false;
910 break;
911 case OperationType::GEPOp:
912 GEPFlagsStorage = 0;
913 break;
914 case OperationType::FPMathOp:
915 case OperationType::FCmp:
916 case OperationType::ReductionOp:
917 getFMFsRef().NoNaNs = false;
918 getFMFsRef().NoInfs = false;
919 break;
920 case OperationType::NonNegOp:
921 NonNegFlags.NonNeg = false;
922 break;
923 case OperationType::Cmp:
924 case OperationType::Other:
925 break;
926 }
927 }
928
929 /// Apply the IR flags to \p I.
930 void applyFlags(Instruction &I) const {
931 switch (OpType) {
932 case OperationType::OverflowingBinOp:
933 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
934 I.setHasNoSignedWrap(WrapFlags.HasNSW);
935 break;
936 case OperationType::Trunc:
937 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
938 I.setHasNoSignedWrap(TruncFlags.HasNSW);
939 break;
940 case OperationType::DisjointOp:
941 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
942 break;
943 case OperationType::PossiblyExactOp:
944 I.setIsExact(ExactFlags.IsExact);
945 break;
946 case OperationType::GEPOp:
947 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
949 break;
950 case OperationType::FPMathOp:
951 case OperationType::FCmp: {
952 const FastMathFlagsTy &F = getFMFsRef();
953 I.setHasAllowReassoc(F.AllowReassoc);
954 I.setHasNoNaNs(F.NoNaNs);
955 I.setHasNoInfs(F.NoInfs);
956 I.setHasNoSignedZeros(F.NoSignedZeros);
957 I.setHasAllowReciprocal(F.AllowReciprocal);
958 I.setHasAllowContract(F.AllowContract);
959 I.setHasApproxFunc(F.ApproxFunc);
960 break;
961 }
962 case OperationType::NonNegOp:
963 I.setNonNeg(NonNegFlags.NonNeg);
964 break;
965 case OperationType::ReductionOp:
966 llvm_unreachable("reduction ops should not use applyFlags");
967 case OperationType::Cmp:
968 case OperationType::Other:
969 break;
970 }
971 }
972
974 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
975 "recipe doesn't have a compare predicate");
976 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
979 }
980
982 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
983 "recipe doesn't have a compare predicate");
984 if (OpType == OperationType::FCmp)
986 else
988 assert(getPredicate() == Pred && "predicate truncated");
989 }
990
994
995 /// Returns true if the recipe has a comparison predicate.
996 bool hasPredicate() const {
997 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
998 }
999
1000 /// Returns true if the recipe has fast-math flags.
1001 bool hasFastMathFlags() const {
1002 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1003 OpType == OperationType::ReductionOp;
1004 }
1005
1007
1008 /// Returns true if the recipe has non-negative flag.
1009 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1010
1011 bool isNonNeg() const {
1012 assert(OpType == OperationType::NonNegOp &&
1013 "recipe doesn't have a NNEG flag");
1014 return NonNegFlags.NonNeg;
1015 }
1016
1017 bool hasNoUnsignedWrap() const {
1018 switch (OpType) {
1019 case OperationType::OverflowingBinOp:
1020 return WrapFlags.HasNUW;
1021 case OperationType::Trunc:
1022 return TruncFlags.HasNUW;
1023 default:
1024 llvm_unreachable("recipe doesn't have a NUW flag");
1025 }
1026 }
1027
1028 bool hasNoSignedWrap() const {
1029 switch (OpType) {
1030 case OperationType::OverflowingBinOp:
1031 return WrapFlags.HasNSW;
1032 case OperationType::Trunc:
1033 return TruncFlags.HasNSW;
1034 default:
1035 llvm_unreachable("recipe doesn't have a NSW flag");
1036 }
1037 }
1038
1039 bool hasNoWrapFlags() const {
1040 switch (OpType) {
1041 case OperationType::OverflowingBinOp:
1042 case OperationType::Trunc:
1043 return true;
1044 default:
1045 return false;
1046 }
1047 }
1048
1050 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1051 }
1052
1053 bool isDisjoint() const {
1054 assert(OpType == OperationType::DisjointOp &&
1055 "recipe cannot have a disjoing flag");
1056 return DisjointFlags.IsDisjoint;
1057 }
1058
1060 assert(OpType == OperationType::ReductionOp &&
1061 "recipe doesn't have reduction flags");
1062 return static_cast<RecurKind>(ReductionFlags.Kind);
1063 }
1064
1065 bool isReductionOrdered() const {
1066 assert(OpType == OperationType::ReductionOp &&
1067 "recipe doesn't have reduction flags");
1068 return ReductionFlags.IsOrdered;
1069 }
1070
1071 bool isReductionInLoop() const {
1072 assert(OpType == OperationType::ReductionOp &&
1073 "recipe doesn't have reduction flags");
1074 return ReductionFlags.IsInLoop;
1075 }
1076
1077private:
1078 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1079 FastMathFlagsTy &getFMFsRef() {
1080 if (OpType == OperationType::FCmp)
1081 return FCmpFlags.FMFs;
1082 if (OpType == OperationType::ReductionOp)
1083 return ReductionFlags.FMFs;
1084 return FMFs;
1085 }
1086 const FastMathFlagsTy &getFMFsRef() const {
1087 if (OpType == OperationType::FCmp)
1088 return FCmpFlags.FMFs;
1089 if (OpType == OperationType::ReductionOp)
1090 return ReductionFlags.FMFs;
1091 return FMFs;
1092 }
1093
1094public:
1095 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1096 /// otherwise. Opcodes not supporting default flags include compares and
1097 /// ComputeReductionResult.
1098 static VPIRFlags getDefaultFlags(unsigned Opcode);
1099
1100#if !defined(NDEBUG)
1101 /// Returns true if the set flags are valid for \p Opcode.
1102 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1103
1104 /// Returns true if \p Opcode has its required flags set.
1105 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1106#endif
1107
1108#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1109 void printFlags(raw_ostream &O) const;
1110#endif
1111};
1113
1114static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1115
1116/// A pure-virtual common base class for recipes defining a single VPValue and
1117/// using IR flags.
1119 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1120 const VPIRFlags &Flags,
1122 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1123
1124 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1125 Type *ResultTy, const VPIRFlags &Flags,
1127 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1128 VPIRFlags(Flags) {}
1129
1130 static inline bool classof(const VPRecipeBase *R) {
1131 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1143 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1144 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1145 }
1146
1147 static inline bool classof(const VPUser *U) {
1148 auto *R = dyn_cast<VPRecipeBase>(U);
1149 return R && classof(R);
1150 }
1151
1152 static inline bool classof(const VPValue *V) {
1153 auto *R = V->getDefiningRecipe();
1154 return R && classof(R);
1155 }
1156
1158
1159 static inline bool classof(const VPSingleDefRecipe *R) {
1160 return classof(static_cast<const VPRecipeBase *>(R));
1161 }
1162
1163 void execute(VPTransformState &State) override = 0;
1164
1165 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1167 VPCostContext &Ctx) const;
1168};
1169
1170/// Helper to manage IR metadata for recipes. It filters out metadata that
1171/// cannot be propagated.
1174
1175public:
1176 VPIRMetadata() = default;
1177
1178 /// Adds metatadata that can be preserved from the original instruction
1179 /// \p I.
1181
1182 /// Copy constructor for cloning.
1184
1186
1187 /// Add all metadata to \p I.
1188 void applyMetadata(Instruction &I) const;
1189
1190 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1191 /// already exists, it will be replaced. Otherwise, it will be added.
1192 void setMetadata(unsigned Kind, MDNode *Node) {
1193 auto It =
1194 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1195 return P.first == Kind;
1196 });
1197 if (It != Metadata.end())
1198 It->second = Node;
1199 else
1200 Metadata.emplace_back(Kind, Node);
1201 }
1202
1203 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1204 /// nodes that are common to both.
1205 void intersect(const VPIRMetadata &MD);
1206
1207 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1208 MDNode *getMetadata(unsigned Kind) const {
1209 auto It =
1210 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1211 return It != Metadata.end() ? It->second : nullptr;
1212 }
1213
1214#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1215 /// Print metadata with node IDs.
1216 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1217#endif
1218};
1219
1220/// This is a concrete Recipe that models a single VPlan-level instruction.
1221/// While as any Recipe it may generate a sequence of IR instructions when
1222/// executed, these instructions would always form a single-def expression as
1223/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1224/// opcodes can take an optional mask. Masks may be assigned during
1225/// predication.
1227 public VPIRMetadata {
1228public:
1229 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1230 enum {
1232 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1233 // values of a first-order recurrence.
1235 // Creates a mask where each lane is active (true) whilst the current
1236 // counter (first operand + index) is less than the second operand. i.e.
1237 // mask[i] = icmpt ult (op0 + i), op1
1238 // The size of the mask returned is VF * Multiplier (UF, third op).
1242 // Increment the canonical IV separately for each unrolled part.
1244 // Abstract instruction that compares two values and branches. This is
1245 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1248 // Branch with 2 boolean condition operands and 3 successors. If condition
1249 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1250 // successor 1; otherwise branches to successor 2. Expanded after region
1251 // dissolution into: (1) an OR of the two conditions branching to
1252 // middle.split or successor 2, and (2) middle.split branching to successor
1253 // 0 or successor 1 based on condition 0.
1256 /// Given operands of (the same) struct type, creates a struct of fixed-
1257 /// width vectors each containing a struct field of all operands. The
1258 /// number of operands matches the element count of every vector.
1260 /// Creates a fixed-width vector containing all operands. The number of
1261 /// operands matches the vector element count.
1263 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1264 /// abstract VPInstruction whose single defined VPValue represents VF
1265 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1266 /// VPInstructions.
1268 /// Reduce the operands to the final reduction result using the operation
1269 /// specified via the operation's VPIRFlags.
1271 // Extracts the last part of its operand. Removed during unrolling.
1273 // Extracts the last lane of its vector operand, per part.
1275 // Extracts the second-to-last lane from its operand or the second-to-last
1276 // part if it is scalar. In the latter case, the recipe will be removed
1277 // during unrolling.
1279 LogicalAnd, // Non-poison propagating logical And.
1280 LogicalOr, // Non-poison propagating logical Or.
1281 // Add an offset in bytes (second operand) to a base pointer (first
1282 // operand). Only generates scalar values (either for the first lane only or
1283 // for all lanes, depending on its uses).
1285 // Add a vector offset in bytes (second operand) to a scalar base pointer
1286 // (first operand).
1288 // Returns a scalar boolean value, which is true if any lane of its
1289 // (boolean) vector operands is true. It produces the reduced value across
1290 // all unrolled iterations. Unrolling will add all copies of its original
1291 // operand as additional operands. AnyOf is poison-safe as all operands
1292 // will be frozen.
1294 // Calculates the first active lane index of the vector predicate operands.
1295 // It produces the lane index across all unrolled iterations. Unrolling will
1296 // add all copies of its original operand as additional operands.
1297 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1298 // result even with operands that are all zeroes.
1300 // Calculates the last active lane index of the vector predicate operands.
1301 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1302 // tail-folding to extract the correct live-out value from the last active
1303 // iteration. It produces the lane index across all unrolled iterations.
1304 // Unrolling will add all copies of its original operand as additional
1305 // operands.
1307 // Returns a reversed vector for the operand.
1309 /// Start vector for reductions with 3 operands: the original start value,
1310 /// the identity value for the reduction and an integer indicating the
1311 /// scaling factor.
1313 /// Extracts a single lane (first operand) from a set of vector operands.
1314 /// The lane specifies an index into a vector formed by combining all vector
1315 /// operands (all operands after the first one).
1317 /// Explicit user for the resume phi of the canonical induction in the main
1318 /// VPlan, used by the epilogue vector loop.
1320 /// Extracts the last active lane from a set of vectors. The first operand
1321 /// is the default value if no lanes in the masks are active. Conceptually,
1322 /// this concatenates all data vectors (odd operands), concatenates all
1323 /// masks (even operands -- ignoring the default value), and returns the
1324 /// last active value from the combined data vector using the combined mask.
1326 /// Compute the exiting value of a wide induction after vectorization, that
1327 /// is the value of the last lane of the induction increment (i.e. its
1328 /// backedge value). Has the wide induction recipe as operand.
1331
1332 // The opcodes below are used for VPInstructionWithType.
1333 // NOTE: VPInstructionWithType classes are also used for:
1334 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1335 // cases where createScalarCast, createScalarZExtOrTrunc and
1336 // createScalarSExtOrTrunc are invoked.
1337 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1338
1339 /// Scale the first operand (vector step) by the second operand
1340 /// (scalar-step). Casts both operands to the result type if needed.
1342 // Creates a step vector starting from 0 to VF with a step of 1.
1344 /// Returns the value for vscale.
1346
1348 };
1349
1350 /// Returns true if this VPInstruction generates scalar values for all lanes.
1351 /// Most VPInstructions generate a single value per part, either vector or
1352 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1353 /// values per all lanes, stemming from an original ingredient. This method
1354 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1355 /// underlying ingredient.
1356 bool doesGeneratePerAllLanes() const;
1357
1358 /// Return the number of operands determined by the opcode of the
1359 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1360 /// cannot be determined directly by the opcode.
1361 unsigned getNumOperandsForOpcode() const;
1362
1363private:
1364 typedef unsigned char OpcodeTy;
1365 OpcodeTy Opcode;
1366
1367 /// An optional name that can be used for the generated IR instruction.
1368 std::string Name;
1369
1370 /// Returns true if we can generate a scalar for the first lane only if
1371 /// needed.
1372 bool canGenerateScalarForFirstLane() const;
1373
1374 /// Utility methods serving execute(): generates a single vector instance of
1375 /// the modeled instruction. \returns the generated value. . In some cases an
1376 /// existing value is returned rather than a generated one.
1377 Value *generate(VPTransformState &State);
1378
1379 /// Returns true if the VPInstruction does not need masking.
1380 bool alwaysUnmasked() const {
1381 if (Opcode == VPInstruction::MaskedCond)
1382 return false;
1383
1384 // For now only VPInstructions with underlying values use masks.
1385 // TODO: provide masks to VPInstructions w/o underlying values.
1386 if (!getUnderlyingValue())
1387 return true;
1388
1389 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1390 Opcode == Instruction::GetElementPtr;
1391 }
1392
1393public:
1394 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1395 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1396 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "");
1397
1398 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1399
1401
1403 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1404 getDebugLoc(), Name);
1405 if (getUnderlyingValue())
1406 New->setUnderlyingValue(getUnderlyingInstr());
1407 return New;
1408 }
1409
1410 unsigned getOpcode() const { return Opcode; }
1411
1412 /// Generate the instruction.
1413 /// TODO: We currently execute only per-part unless a specific instance is
1414 /// provided.
1415 void execute(VPTransformState &State) override;
1416
1417 /// Return the cost of this VPInstruction.
1418 InstructionCost computeCost(ElementCount VF,
1419 VPCostContext &Ctx) const override;
1420
1421#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1422 /// Print the VPInstruction to dbgs() (for debugging).
1423 LLVM_DUMP_METHOD void dump() const;
1424#endif
1425
1426 bool hasResult() const {
1427 // CallInst may or may not have a result, depending on the called function.
1428 // Conservatively return calls have results for now.
1429 switch (getOpcode()) {
1430 case Instruction::Ret:
1431 case Instruction::UncondBr:
1432 case Instruction::CondBr:
1433 case Instruction::Store:
1434 case Instruction::Switch:
1435 case Instruction::IndirectBr:
1436 case Instruction::Resume:
1437 case Instruction::CatchRet:
1438 case Instruction::Unreachable:
1439 case Instruction::Fence:
1440 case Instruction::AtomicRMW:
1444 return false;
1445 default:
1446 return true;
1447 }
1448 }
1449
1450 /// Returns true if the VPInstruction has a mask operand.
1451 bool isMasked() const {
1452 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1453 // VPInstructions without a fixed number of operands cannot be masked.
1454 if (NumOpsForOpcode == -1u)
1455 return false;
1456 return NumOpsForOpcode + 1 == getNumOperands();
1457 }
1458
1459 /// Returns the number of operands, excluding the mask if the VPInstruction is
1460 /// masked.
1461 unsigned getNumOperandsWithoutMask() const {
1462 return getNumOperands() - isMasked();
1463 }
1464
1465 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1466 void addMask(VPValue *Mask) {
1467 assert(!isMasked() && "recipe is already masked");
1468 if (alwaysUnmasked())
1469 return;
1470 addOperand(Mask);
1471 }
1472
1473 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1474 /// VPInstructions.
1475 VPValue *getMask() const {
1476 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1477 }
1478
1479 /// Returns an iterator range over the operands excluding the mask operand
1480 /// if present.
1487
1488 /// Returns true if the underlying opcode may read from or write to memory.
1489 bool opcodeMayReadOrWriteFromMemory() const;
1490
1491 /// Returns true if the recipe only uses the first lane of operand \p Op.
1492 bool usesFirstLaneOnly(const VPValue *Op) const override;
1493
1494 /// Returns true if the recipe only uses the first part of operand \p Op.
1495 bool usesFirstPartOnly(const VPValue *Op) const override;
1496
1497 /// Returns true if this VPInstruction produces a scalar value from a vector,
1498 /// e.g. by performing a reduction or extracting a lane.
1499 bool isVectorToScalar() const;
1500
1501 /// Returns true if this VPInstruction's operands are single scalars and the
1502 /// result is also a single scalar.
1503 bool isSingleScalar() const;
1504
1505 /// Returns the symbolic name assigned to the VPInstruction.
1506 StringRef getName() const { return Name; }
1507
1508 /// Set the symbolic name for the VPInstruction.
1509 void setName(StringRef NewName) { Name = NewName.str(); }
1510
1511protected:
1512#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1513 /// Print the VPInstruction to \p O.
1514 void printRecipe(raw_ostream &O, const Twine &Indent,
1515 VPSlotTracker &SlotTracker) const override;
1516#endif
1517};
1518
1519/// A specialization of VPInstruction augmenting it with a dedicated result
1520/// type, to be used when the opcode and operands of the VPInstruction don't
1521/// directly determine the result type. Note that there is no separate recipe ID
1522/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1523/// distinguished purely by the opcode.
1525 /// Scalar result type produced by the recipe.
1526 Type *ResultTy;
1527
1528public:
1530 Type *ResultTy, const VPIRFlags &Flags = {},
1531 const VPIRMetadata &Metadata = {},
1533 const Twine &Name = "")
1534 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name),
1535 ResultTy(ResultTy) {}
1536
1537 static inline bool classof(const VPRecipeBase *R) {
1538 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1539 // type information.
1540 auto *VPI = dyn_cast<VPInstruction>(R);
1541 if (!VPI)
1542 return false;
1543 unsigned Opc = VPI->getOpcode();
1545 return true;
1546 switch (Opc) {
1550 case Instruction::Load:
1551 return true;
1552 default:
1553 return false;
1554 }
1555 }
1556
1557 static inline bool classof(const VPUser *R) {
1559 }
1560
1561 VPInstruction *clone() override {
1562 auto *New =
1564 *this, *this, getDebugLoc(), getName());
1565 New->setUnderlyingValue(getUnderlyingValue());
1566 return New;
1567 }
1568
1569 void execute(VPTransformState &State) override;
1570
1571 /// Return the cost of this VPInstruction.
1573 VPCostContext &Ctx) const override {
1574 // TODO: Compute accurate cost after retiring the legacy cost model.
1575 return 0;
1576 }
1577
1578 Type *getResultType() const { return ResultTy; }
1579
1580protected:
1581#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1582 /// Print the recipe.
1583 void printRecipe(raw_ostream &O, const Twine &Indent,
1584 VPSlotTracker &SlotTracker) const override;
1585#endif
1586};
1587
1588/// Helper type to provide functions to access incoming values and blocks for
1589/// phi-like recipes.
1591protected:
1592 /// Return a VPRecipeBase* to the current object.
1593 virtual const VPRecipeBase *getAsRecipe() const = 0;
1594
1595public:
1596 virtual ~VPPhiAccessors() = default;
1597
1598 /// Returns the incoming VPValue with index \p Idx.
1599 VPValue *getIncomingValue(unsigned Idx) const {
1600 return getAsRecipe()->getOperand(Idx);
1601 }
1602
1603 /// Returns the incoming block with index \p Idx.
1604 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1605
1606 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1607 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1608
1609 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1610 /// block.
1611 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1612
1613 /// Returns the number of incoming values, also number of incoming blocks.
1614 virtual unsigned getNumIncoming() const {
1615 return getAsRecipe()->getNumOperands();
1616 }
1617
1618 /// Returns an interator range over the incoming values.
1620 return make_range(getAsRecipe()->op_begin(),
1621 getAsRecipe()->op_begin() + getNumIncoming());
1622 }
1623
1625 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1626
1627 /// Returns an iterator range over the incoming blocks.
1629 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1630 return getIncomingBlock(Idx);
1631 };
1632 return map_range(index_range(0, getNumIncoming()), GetBlock);
1633 }
1634
1635 /// Returns an iterator range over pairs of incoming values and corresponding
1636 /// incoming blocks.
1642
1643 /// Removes the incoming value for \p IncomingBlock, which must be a
1644 /// predecessor.
1645 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1646
1647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1648 /// Print the recipe.
1650#endif
1651};
1652
1655 const Twine &Name = "")
1656 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name) {}
1657
1658 static inline bool classof(const VPUser *U) {
1659 auto *VPI = dyn_cast<VPInstruction>(U);
1660 return VPI && VPI->getOpcode() == Instruction::PHI;
1661 }
1662
1663 static inline bool classof(const VPValue *V) {
1664 auto *VPI = dyn_cast<VPInstruction>(V);
1665 return VPI && VPI->getOpcode() == Instruction::PHI;
1666 }
1667
1668 static inline bool classof(const VPSingleDefRecipe *SDR) {
1669 auto *VPI = dyn_cast<VPInstruction>(SDR);
1670 return VPI && VPI->getOpcode() == Instruction::PHI;
1671 }
1672
1673 VPPhi *clone() override {
1674 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1675 PhiR->setUnderlyingValue(getUnderlyingValue());
1676 return PhiR;
1677 }
1678
1679 void execute(VPTransformState &State) override;
1680
1681protected:
1682#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1683 /// Print the recipe.
1684 void printRecipe(raw_ostream &O, const Twine &Indent,
1685 VPSlotTracker &SlotTracker) const override;
1686#endif
1687
1688 const VPRecipeBase *getAsRecipe() const override { return this; }
1689};
1690
1691/// A recipe to wrap on original IR instruction not to be modified during
1692/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1693/// Expect PHIs, VPIRInstructions cannot have any operands.
1695 Instruction &I;
1696
1697protected:
1698 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1699 /// subclasses may need to be created, e.g. VPIRPhi.
1701 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1702
1703public:
1704 ~VPIRInstruction() override = default;
1705
1706 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1707 /// VPIRInstruction.
1709
1710 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1711
1713 auto *R = create(I);
1714 for (auto *Op : operands())
1715 R->addOperand(Op);
1716 return R;
1717 }
1718
1719 void execute(VPTransformState &State) override;
1720
1721 /// Return the cost of this VPIRInstruction.
1723 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1724
1725 Instruction &getInstruction() const { return I; }
1726
1727 bool usesScalars(const VPValue *Op) const override {
1729 "Op must be an operand of the recipe");
1730 return true;
1731 }
1732
1733 bool usesFirstPartOnly(const VPValue *Op) const override {
1735 "Op must be an operand of the recipe");
1736 return true;
1737 }
1738
1739 bool usesFirstLaneOnly(const VPValue *Op) const override {
1741 "Op must be an operand of the recipe");
1742 return true;
1743 }
1744
1745protected:
1746#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1747 /// Print the recipe.
1748 void printRecipe(raw_ostream &O, const Twine &Indent,
1749 VPSlotTracker &SlotTracker) const override;
1750#endif
1751};
1752
1753/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1754/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1755/// allowed, and it is used to add a new incoming value for the single
1756/// predecessor VPBB.
1758 public VPPhiAccessors {
1760
1761 static inline bool classof(const VPRecipeBase *U) {
1762 auto *R = dyn_cast<VPIRInstruction>(U);
1763 return R && isa<PHINode>(R->getInstruction());
1764 }
1765
1766 static inline bool classof(const VPUser *U) {
1767 auto *R = dyn_cast<VPRecipeBase>(U);
1768 return R && classof(R);
1769 }
1770
1772
1773 void execute(VPTransformState &State) override;
1774
1775protected:
1776#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1777 /// Print the recipe.
1778 void printRecipe(raw_ostream &O, const Twine &Indent,
1779 VPSlotTracker &SlotTracker) const override;
1780#endif
1781
1782 const VPRecipeBase *getAsRecipe() const override { return this; }
1783};
1784
1785/// VPWidenRecipe is a recipe for producing a widened instruction using the
1786/// opcode and operands of the recipe. This recipe covers most of the
1787/// traditional vectorization cases where each recipe transforms into a
1788/// vectorized version of itself.
1790 public VPIRMetadata {
1791 unsigned Opcode;
1792
1793public:
1795 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1796 DebugLoc DL = {})
1797 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1798 VPIRMetadata(Metadata), Opcode(I.getOpcode()) {
1799 setUnderlyingValue(&I);
1800 }
1801
1802 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1803 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1804 DebugLoc DL = {})
1805 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands, Flags, DL),
1806 VPIRMetadata(Metadata), Opcode(Opcode) {}
1807
1808 ~VPWidenRecipe() override = default;
1809
1811
1813 if (auto *UV = getUnderlyingValue())
1814 return new VPWidenRecipe(*cast<Instruction>(UV), NewOperands, *this,
1815 *this, getDebugLoc());
1816 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1817 }
1818
1819 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1820
1821 /// Produce a widened instruction using the opcode and operands of the recipe,
1822 /// processing State.VF elements.
1823 void execute(VPTransformState &State) override;
1824
1825 /// Return the cost of this VPWidenRecipe.
1826 InstructionCost computeCost(ElementCount VF,
1827 VPCostContext &Ctx) const override;
1828
1829 unsigned getOpcode() const { return Opcode; }
1830
1831protected:
1832#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1833 /// Print the recipe.
1834 void printRecipe(raw_ostream &O, const Twine &Indent,
1835 VPSlotTracker &SlotTracker) const override;
1836#endif
1837
1838 /// Returns true if the recipe only uses the first lane of operand \p Op.
1839 bool usesFirstLaneOnly(const VPValue *Op) const override {
1841 "Op must be an operand of the recipe");
1842 return Opcode == Instruction::Select && Op == getOperand(0) &&
1843 Op->isDefinedOutsideLoopRegions();
1844 }
1845};
1846
1847/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1849 /// Cast instruction opcode.
1850 Instruction::CastOps Opcode;
1851
1852public:
1854 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1855 const VPIRMetadata &Metadata = {},
1857 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1858 DL),
1859 VPIRMetadata(Metadata), Opcode(Opcode) {
1860 assert(flagsValidForOpcode(Opcode) &&
1861 "Set flags not supported for the provided opcode");
1863 "Opcode requires specific flags to be set");
1865 }
1866
1867 ~VPWidenCastRecipe() override = default;
1868
1870 return new VPWidenCastRecipe(Opcode, getOperand(0), getScalarType(),
1872 *this, *this, getDebugLoc());
1873 }
1874
1875 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1876
1877 /// Produce widened copies of the cast.
1878 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1879
1880 /// Return the cost of this VPWidenCastRecipe.
1882 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1883
1884 Instruction::CastOps getOpcode() const { return Opcode; }
1885
1886protected:
1887#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1888 /// Print the recipe.
1889 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1890 VPSlotTracker &SlotTracker) const override;
1891#endif
1892};
1893
1894/// A recipe for widening vector intrinsics.
1896 /// ID of the vector intrinsic to widen.
1897 Intrinsic::ID VectorIntrinsicID;
1898
1899 /// True if the intrinsic may read from memory.
1900 bool MayReadFromMemory;
1901
1902 /// True if the intrinsic may read write to memory.
1903 bool MayWriteToMemory;
1904
1905 /// True if the intrinsic may have side-effects.
1906 bool MayHaveSideEffects;
1907
1908protected:
1909 VPWidenIntrinsicRecipe(const unsigned char SC,
1910 Intrinsic::ID VectorIntrinsicID,
1911 ArrayRef<VPValue *> CallArguments, Type *Ty,
1912 const VPIRFlags &Flags = {},
1913 const VPIRMetadata &MD = {},
1915 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1916 VectorIntrinsicID(VectorIntrinsicID) {
1917 LLVMContext &Ctx = Ty->getContext();
1918 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1919 MemoryEffects ME = Attrs.getMemoryEffects();
1920 MayReadFromMemory = !ME.onlyWritesMemory();
1921 MayWriteToMemory = !ME.onlyReadsMemory();
1922 MayHaveSideEffects = MayWriteToMemory ||
1923 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1924 !Attrs.hasAttribute(Attribute::WillReturn);
1925 }
1926
1927 /// Helper function to produce the widened intrinsic call.
1928 CallInst *createVectorCall(VPTransformState &State);
1929
1930public:
1932 ArrayRef<VPValue *> CallArguments, Type *Ty,
1933 const VPIRFlags &Flags = {},
1934 const VPIRMetadata &MD = {},
1936 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1937 Flags, DL),
1938 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1939 MayReadFromMemory(CI.mayReadFromMemory()),
1940 MayWriteToMemory(CI.mayWriteToMemory()),
1941 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1942 setUnderlyingValue(&CI);
1943 }
1944
1946 ArrayRef<VPValue *> CallArguments, Type *Ty,
1947 const VPIRFlags &Flags = {},
1948 const VPIRMetadata &Metadata = {},
1950 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1951 VectorIntrinsicID, CallArguments, Ty, Flags,
1952 Metadata, DL) {}
1953
1954 ~VPWidenIntrinsicRecipe() override = default;
1955
1957 if (Value *CI = getUnderlyingValue())
1958 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1959 operands(), getScalarType(), *this,
1960 *this, getDebugLoc());
1961 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1962 getScalarType(), *this, *this,
1963 getDebugLoc());
1964 }
1965
1966 static inline bool classof(const VPRecipeBase *R) {
1967 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1968 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1969 }
1970
1971 static inline bool classof(const VPUser *U) {
1972 auto *R = dyn_cast<VPRecipeBase>(U);
1973 return R && classof(R);
1974 }
1975
1976 static inline bool classof(const VPValue *V) {
1977 auto *R = V->getDefiningRecipe();
1978 return R && classof(R);
1979 }
1980
1981 static inline bool classof(const VPSingleDefRecipe *R) {
1982 return classof(static_cast<const VPRecipeBase *>(R));
1983 }
1984
1985 /// Produce a widened version of the vector intrinsic.
1986 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1987
1988 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
1991 const VPRecipeWithIRFlags &R,
1992 ElementCount VF, VPCostContext &Ctx);
1993
1994 /// Return the cost of this vector intrinsic.
1996 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1997
1998 /// Return the ID of the intrinsic.
1999 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2000
2001 /// Return to name of the intrinsic as string.
2003
2004 /// Returns true if the intrinsic may read from memory.
2005 bool mayReadFromMemory() const { return MayReadFromMemory; }
2006
2007 /// Returns true if the intrinsic may write to memory.
2008 bool mayWriteToMemory() const { return MayWriteToMemory; }
2009
2010 /// Returns true if the intrinsic may have side-effects.
2011 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2012
2013 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2014
2015protected:
2016#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2017 /// Print the recipe.
2018 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2019 VPSlotTracker &SlotTracker) const override;
2020#endif
2021};
2022
2023/// A recipe for widening vector memory intrinsics.
2025 /// Alignment information for this memory access.
2026 Align Alignment;
2027
2028public:
2029 // TODO: support StoreInst for strided store
2031 ArrayRef<VPValue *> CallArguments, Type *Ty,
2032 Align Alignment, const VPIRMetadata &MD = {},
2034 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2035 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2036 DL),
2037 Alignment(Alignment) {
2038 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2039 "Unexpected intrinsic");
2040 }
2041
2042 ~VPWidenMemIntrinsicRecipe() override = default;
2043
2046 getScalarType(), Alignment, *this,
2047 getDebugLoc());
2048 }
2049
2050 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2051
2052 /// Produce a widened version of the vector memory intrinsic.
2053 void execute(VPTransformState &State) override;
2054
2055 /// Helper function for computing the cost of vector memory intrinsic.
2057 bool IsMasked, Align Alignment,
2058 VPCostContext &Ctx);
2059
2060 /// Return the cost of this vector memory intrinsic.
2062 VPCostContext &Ctx) const override;
2063};
2064
2065/// A recipe for widening Call instructions using library calls.
2067 public VPIRMetadata {
2068 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2069 /// between a given VF and the chosen vectorized variant, so there will be a
2070 /// different VPlan for each VF with a valid variant.
2071 Function *Variant;
2072
2073public:
2075 ArrayRef<VPValue *> CallArguments,
2076 const VPIRFlags &Flags = {},
2077 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2078 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2079 toScalarizedTy(Variant->getReturnType()), Flags,
2080 DL),
2081 VPIRMetadata(Metadata), Variant(Variant) {
2082 setUnderlyingValue(UV);
2083 assert(
2084 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2085 "last operand must be the called function");
2086 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2087 ->getReturnType() == getScalarType() &&
2088 "Scalar type must match return type of called scalar function");
2089 }
2090
2091 ~VPWidenCallRecipe() override = default;
2092
2094 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2095 *this, *this, getDebugLoc());
2096 }
2097
2098 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2099
2100 /// Produce a widened version of the call instruction.
2101 void execute(VPTransformState &State) override;
2102
2103 /// Return the cost of this VPWidenCallRecipe.
2104 InstructionCost computeCost(ElementCount VF,
2105 VPCostContext &Ctx) const override;
2106
2107 /// Return the cost of widening a call using the vector function \p Variant.
2108 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2109
2113
2116
2117 /// Returns true if the recipe only uses the first lane of operand \p Op.
2118 bool usesFirstLaneOnly(const VPValue *Op) const override;
2119
2120protected:
2121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2122 /// Print the recipe.
2123 void printRecipe(raw_ostream &O, const Twine &Indent,
2124 VPSlotTracker &SlotTracker) const override;
2125#endif
2126};
2127
2128/// A recipe representing a sequence of load -> update -> store as part of
2129/// a histogram operation. This means there may be aliasing between vector
2130/// lanes, which is handled by the llvm.experimental.vector.histogram family
2131/// of intrinsics. The only update operations currently supported are
2132/// 'add' and 'sub' where the other term is loop-invariant.
2134 /// Opcode of the update operation, currently either add or sub.
2135 unsigned Opcode;
2136
2137public:
2138 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2140 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2141 Opcode(Opcode) {}
2142
2143 ~VPHistogramRecipe() override = default;
2144
2146 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
2147 }
2148
2149 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2150
2151 /// Produce a vectorized histogram operation.
2152 void execute(VPTransformState &State) override;
2153
2154 /// Return the cost of this VPHistogramRecipe.
2156 VPCostContext &Ctx) const override;
2157
2158 unsigned getOpcode() const { return Opcode; }
2159
2160 /// Return the mask operand if one was provided, or a null pointer if all
2161 /// lanes should be executed unconditionally.
2162 VPValue *getMask() const {
2163 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2164 }
2165
2166protected:
2167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2168 /// Print the recipe
2169 void printRecipe(raw_ostream &O, const Twine &Indent,
2170 VPSlotTracker &SlotTracker) const override;
2171#endif
2172};
2173
2174/// A recipe for handling GEP instructions.
2176 Type *SourceElementTy;
2177
2178 bool isPointerLoopInvariant() const {
2179 return getOperand(0)->isDefinedOutsideLoopRegions();
2180 }
2181
2182 bool isIndexLoopInvariant(unsigned I) const {
2183 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2184 }
2185
2186public:
2188 const VPIRFlags &Flags = {},
2190 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2191 getScalarTypeOrInfer(Operands[0]), Flags, DL),
2192 SourceElementTy(GEP->getSourceElementType()) {
2193 setUnderlyingValue(GEP);
2195 (void)Metadata;
2197 assert(Metadata.empty() && "unexpected metadata on GEP");
2198 }
2199
2200 ~VPWidenGEPRecipe() override = default;
2201
2204 operands(), *this, getDebugLoc());
2205 }
2206
2207 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2208
2209 /// This recipe generates a GEP instruction.
2210 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2211
2212 /// Generate the gep nodes.
2213 void execute(VPTransformState &State) override;
2214
2215 Type *getSourceElementType() const { return SourceElementTy; }
2216
2217 /// Return the cost of this VPWidenGEPRecipe.
2219 VPCostContext &Ctx) const override {
2220 // TODO: Compute accurate cost after retiring the legacy cost model.
2221 return 0;
2222 }
2223
2224 /// Returns true if the recipe only uses the first lane of operand \p Op.
2225 bool usesFirstLaneOnly(const VPValue *Op) const override;
2226
2227protected:
2228#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2229 /// Print the recipe.
2230 void printRecipe(raw_ostream &O, const Twine &Indent,
2231 VPSlotTracker &SlotTracker) const override;
2232#endif
2233};
2234
2235/// A recipe to compute a pointer to the last element of each part of a widened
2236/// memory access for widened memory accesses of SourceElementTy. Used for
2237/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2238/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2239/// unroller otherwise.
2241 Type *SourceElementTy;
2242
2243 /// The constant stride of the pointer computed by this recipe, expressed in
2244 /// units of SourceElementTy.
2245 int64_t Stride;
2246
2247public:
2248 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2249 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2250 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2251 getScalarTypeOrInfer(Ptr), GEPFlags, DL),
2252 SourceElementTy(SourceElementTy), Stride(Stride) {
2253 assert(Stride < 0 && "Stride must be negative");
2254 }
2255
2256 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2257
2258 Type *getSourceElementType() const { return SourceElementTy; }
2259 int64_t getStride() const { return Stride; }
2260 VPValue *getPointer() const { return getOperand(0); }
2261 VPValue *getVFValue() const { return getOperand(1); }
2263 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2264 }
2265
2266 /// Adds the offset operand to the recipe.
2267 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2268 void materializeOffset(unsigned Part = 0);
2269
2270 void execute(VPTransformState &State) override;
2271
2272 bool usesFirstLaneOnly(const VPValue *Op) const override {
2274 "Op must be an operand of the recipe");
2275 return true;
2276 }
2277
2278 /// Return the cost of this VPVectorPointerRecipe.
2280 VPCostContext &Ctx) const override {
2281 // TODO: Compute accurate cost after retiring the legacy cost model.
2282 return 0;
2283 }
2284
2285 /// Returns true if the recipe only uses the first part of operand \p Op.
2286 bool usesFirstPartOnly(const VPValue *Op) const override {
2288 "Op must be an operand of the recipe");
2289 assert(getNumOperands() <= 2 && "must have at most two operands");
2290 return true;
2291 }
2292
2294 auto *VEPR = new VPVectorEndPointerRecipe(
2297 if (auto *Offset = getOffset())
2298 VEPR->addOperand(Offset);
2299 return VEPR;
2300 }
2301
2302protected:
2303#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2304 /// Print the recipe.
2305 void printRecipe(raw_ostream &O, const Twine &Indent,
2306 VPSlotTracker &SlotTracker) const override;
2307#endif
2308};
2309
2310/// A recipe to compute the pointers for widened memory accesses of \p
2311/// SourceElementTy, with the \p Stride expressed in units of \p
2312/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2313/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2315 Type *SourceElementTy;
2316
2317public:
2318 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2319 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2320 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2321 ArrayRef<VPValue *>({Ptr, Stride}),
2322 getScalarTypeOrInfer(Ptr), GEPFlags, DL),
2323 SourceElementTy(SourceElementTy) {}
2324
2325 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2326
2327 VPValue *getStride() const { return getOperand(1); }
2328
2330 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2331 }
2332
2333 void execute(VPTransformState &State) override;
2334
2335 Type *getSourceElementType() const { return SourceElementTy; }
2336
2337 bool usesFirstLaneOnly(const VPValue *Op) const override {
2339 "Op must be an operand of the recipe");
2340 return true;
2341 }
2342
2343 /// Returns true if the recipe only uses the first part of operand \p Op.
2344 bool usesFirstPartOnly(const VPValue *Op) const override {
2346 "Op must be an operand of the recipe");
2347 assert(getNumOperands() <= 2 && "must have at most two operands");
2348 return true;
2349 }
2350
2352 auto *Clone =
2353 new VPVectorPointerRecipe(getOperand(0), SourceElementTy, getStride(),
2355 if (auto *VFxPart = getVFxPart())
2356 Clone->addOperand(VFxPart);
2357 return Clone;
2358 }
2359
2360 /// Return the cost of this VPHeaderPHIRecipe.
2362 VPCostContext &Ctx) const override {
2363 // TODO: Compute accurate cost after retiring the legacy cost model.
2364 return 0;
2365 }
2366
2367protected:
2368#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2369 /// Print the recipe.
2370 void printRecipe(raw_ostream &O, const Twine &Indent,
2371 VPSlotTracker &SlotTracker) const override;
2372#endif
2373};
2374
2375/// A pure virtual base class for all recipes modeling header phis, including
2376/// phis for first order recurrences, pointer inductions and reductions. The
2377/// start value is the first operand of the recipe and the incoming value from
2378/// the backedge is the second operand.
2379///
2380/// Inductions are modeled using the following sub-classes:
2381/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2382/// floating point inductions with arbitrary start and step values. Produces
2383/// a vector PHI per-part.
2384/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2385/// pointer induction. Produces either a vector PHI per-part or scalar values
2386/// per-lane based on the canonical induction.
2387/// * VPFirstOrderRecurrencePHIRecipe
2388/// * VPReductionPHIRecipe
2389/// * VPActiveLaneMaskPHIRecipe
2390/// * VPEVLBasedIVPHIRecipe
2391///
2392/// Note that the canonical IV is modeled as a VPRegionValue associated with
2393/// its loop region.
2395 public VPPhiAccessors {
2396protected:
2397 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2398 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2399 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2400 getScalarTypeOrInfer(Start), DL) {}
2401
2402 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2403 VPValue *Start, Type *ResultTy, DebugLoc DL)
2404 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2405
2406 const VPRecipeBase *getAsRecipe() const override { return this; }
2407
2408public:
2409 ~VPHeaderPHIRecipe() override = default;
2410
2411 /// Method to support type inquiry through isa, cast, and dyn_cast.
2412 static inline bool classof(const VPRecipeBase *R) {
2413 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2414 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2415 }
2416 static inline bool classof(const VPValue *V) {
2417 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2418 }
2419 static inline bool classof(const VPSingleDefRecipe *R) {
2420 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2421 }
2422
2423 /// Generate the phi nodes.
2424 void execute(VPTransformState &State) override = 0;
2425
2426 /// Return the cost of this header phi recipe.
2428 VPCostContext &Ctx) const override;
2429
2430 /// Returns the start value of the phi, if one is set.
2432 return getNumOperands() == 0 ? nullptr : getOperand(0);
2433 }
2435 return getNumOperands() == 0 ? nullptr : getOperand(0);
2436 }
2437
2438 /// Update the start value of the recipe.
2440
2441 /// Returns the incoming value from the loop backedge.
2443 return getOperand(1);
2444 }
2445
2446 /// Update the incoming value from the loop backedge.
2448
2449 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2450 /// to be a recipe.
2452 return *getBackedgeValue()->getDefiningRecipe();
2453 }
2454
2455protected:
2456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2457 /// Print the recipe.
2458 void printRecipe(raw_ostream &O, const Twine &Indent,
2459 VPSlotTracker &SlotTracker) const override = 0;
2460#endif
2461};
2462
2463/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2464/// VPWidenPointerInductionRecipe), providing shared functionality, including
2465/// retrieving the step value, induction descriptor and original phi node.
2467 InductionDescriptor IndDesc;
2468
2469public:
2470 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2471 VPValue *Step, const InductionDescriptor &IndDesc,
2472 DebugLoc DL)
2473 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2474 getScalarTypeOrInfer(Start), DL) {}
2475
2476 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2477 VPValue *Step, const InductionDescriptor &IndDesc,
2478 Type *ResultTy, DebugLoc DL)
2479 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2480 addOperand(Step);
2481 }
2482
2483 static inline bool classof(const VPRecipeBase *R) {
2484 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2485 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2486 }
2487
2488 static inline bool classof(const VPValue *V) {
2489 auto *R = V->getDefiningRecipe();
2490 return R && classof(R);
2491 }
2492
2493 static inline bool classof(const VPSingleDefRecipe *R) {
2494 return classof(static_cast<const VPRecipeBase *>(R));
2495 }
2496
2497 void execute(VPTransformState &State) override = 0;
2498
2499 /// Returns the start value of the induction.
2501
2502 /// Returns the step value of the induction.
2504 const VPValue *getStepValue() const { return getOperand(1); }
2505
2506 /// Update the step value of the recipe.
2507 void setStepValue(VPValue *V) { setOperand(1, V); }
2508
2510 const VPValue *getVFValue() const { return getOperand(2); }
2511
2512 /// Returns the number of incoming values, also number of incoming blocks.
2513 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2514 /// incoming value, its start value.
2515 unsigned getNumIncoming() const override { return 1; }
2516
2517 /// Returns the underlying PHINode if one exists, or null otherwise.
2521
2522 /// Returns the induction descriptor for the recipe.
2523 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2524
2526 // TODO: All operands of base recipe must exist and be at same index in
2527 // derived recipe.
2529 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2530 }
2531
2533 // TODO: All operands of base recipe must exist and be at same index in
2534 // derived recipe.
2536 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2537 }
2538
2539 /// Returns true if the recipe only uses the first lane of operand \p Op.
2540 bool usesFirstLaneOnly(const VPValue *Op) const override {
2542 "Op must be an operand of the recipe");
2543 // The recipe creates its own wide start value, so it only requests the
2544 // first lane of the operand.
2545 // TODO: Remove once creating the start value is modeled separately.
2546 return Op == getStartValue() || Op == getStepValue();
2547 }
2548};
2549
2550/// A recipe for handling phi nodes of integer and floating-point inductions,
2551/// producing their vector values. This is an abstract recipe and must be
2552/// converted to concrete recipes before executing.
2554 public VPIRFlags {
2555 TruncInst *Trunc;
2556
2557 // If this recipe is unrolled it will have 2 additional operands.
2558 bool isUnrolled() const { return getNumOperands() == 5; }
2559
2560public:
2562 VPValue *VF, const InductionDescriptor &IndDesc,
2563 const VPIRFlags &Flags, DebugLoc DL)
2564 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2565 Start, Step, IndDesc, DL),
2566 VPIRFlags(Flags), Trunc(nullptr) {
2567 addOperand(VF);
2568 }
2569
2571 VPValue *VF, const InductionDescriptor &IndDesc,
2572 TruncInst *Trunc, const VPIRFlags &Flags,
2573 DebugLoc DL)
2574 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2575 Start, Step, IndDesc,
2576 Trunc ? Trunc->getType() : Start->getType(), DL),
2577 VPIRFlags(Flags), Trunc(Trunc) {
2578 addOperand(VF);
2580 if (Trunc)
2582 assert(Metadata.empty() && "unexpected metadata on Trunc");
2583 }
2584
2586
2592
2593 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2594
2595 void execute(VPTransformState &State) override {
2596 llvm_unreachable("cannot execute this recipe, should be expanded via "
2597 "expandVPWidenIntOrFpInductionRecipe");
2598 }
2599
2600 /// Returns the start value of the induction.
2602
2603 /// If the recipe has been unrolled, return the VPValue for the induction
2604 /// increment, otherwise return null.
2606 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2607 }
2608
2609 /// Returns the number of incoming values, also number of incoming blocks.
2610 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2611 /// incoming value, its start value.
2612 unsigned getNumIncoming() const override { return 1; }
2613
2614 /// Returns the first defined value as TruncInst, if it is one or nullptr
2615 /// otherwise.
2616 TruncInst *getTruncInst() { return Trunc; }
2617 const TruncInst *getTruncInst() const { return Trunc; }
2618
2619 /// Returns true if the induction is canonical, i.e. starting at 0 and
2620 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2621 /// same type as the canonical induction.
2622 bool isCanonical() const;
2623
2624 /// Returns the VPValue representing the value of this induction at
2625 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2626 /// take place.
2628 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2629 }
2630
2631protected:
2632#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2633 /// Print the recipe.
2634 void printRecipe(raw_ostream &O, const Twine &Indent,
2635 VPSlotTracker &SlotTracker) const override;
2636#endif
2637};
2638
2640public:
2641 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2642 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2643 /// VF*UF.
2645 VPValue *NumUnrolledElems,
2646 const InductionDescriptor &IndDesc, DebugLoc DL)
2647 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2648 Start, Step, IndDesc, DL) {
2649 addOperand(NumUnrolledElems);
2650 }
2651
2653
2659
2660 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2661
2662 /// Generate vector values for the pointer induction.
2663 void execute(VPTransformState &State) override {
2664 llvm_unreachable("cannot execute this recipe, should be expanded via "
2665 "expandVPWidenPointerInduction");
2666 };
2667
2668 /// Returns true if only scalar values will be generated.
2669 bool onlyScalarsGenerated(bool IsScalable);
2670
2671protected:
2672#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2673 /// Print the recipe.
2674 void printRecipe(raw_ostream &O, const Twine &Indent,
2675 VPSlotTracker &SlotTracker) const override;
2676#endif
2677};
2678
2679/// A recipe for widened phis. Incoming values are operands of the recipe and
2680/// their operand index corresponds to the incoming predecessor block. If the
2681/// recipe is placed in an entry block to a (non-replicate) region, it must have
2682/// exactly 2 incoming values, the first from the predecessor of the region and
2683/// the second from the exiting block of the region.
2685 public VPPhiAccessors {
2686 /// Name to use for the generated IR instruction for the widened phi.
2687 std::string Name;
2688
2689public:
2690 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2691 /// debug location \p DL and \p Name.
2693 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2694 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2695 getScalarTypeOrInfer(IncomingValues[0]),
2696 /*UV=*/nullptr, DL),
2697 Name(Name.str()) {}
2698
2700 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2701 }
2702
2703 ~VPWidenPHIRecipe() override = default;
2704
2705 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2706
2707 /// Generate the phi/select nodes.
2708 void execute(VPTransformState &State) override;
2709
2710 /// Return the cost of this VPWidenPHIRecipe.
2712 VPCostContext &Ctx) const override;
2713
2714protected:
2715#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2716 /// Print the recipe.
2717 void printRecipe(raw_ostream &O, const Twine &Indent,
2718 VPSlotTracker &SlotTracker) const override;
2719#endif
2720
2721 const VPRecipeBase *getAsRecipe() const override { return this; }
2722};
2723
2724/// A recipe for handling first-order recurrence phis. The start value is the
2725/// first operand of the recipe and the incoming value from the backedge is the
2726/// second operand.
2729 VPValue &BackedgeValue)
2730 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2731 &Start) {
2732 addOperand(&BackedgeValue);
2733 }
2734
2735 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2736
2741
2742 void execute(VPTransformState &State) override;
2743
2744 /// Return the cost of this first-order recurrence phi recipe.
2746 VPCostContext &Ctx) const override;
2747
2748 /// Returns true if the recipe only uses the first lane of operand \p Op.
2749 bool usesFirstLaneOnly(const VPValue *Op) const override {
2751 "Op must be an operand of the recipe");
2752 return Op == getStartValue();
2753 }
2754
2755protected:
2756#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2757 /// Print the recipe.
2758 void printRecipe(raw_ostream &O, const Twine &Indent,
2759 VPSlotTracker &SlotTracker) const override;
2760#endif
2761};
2762
2763/// Possible variants of a reduction.
2764
2765/// This reduction is ordered and in-loop.
2766struct RdxOrdered {};
2767/// This reduction is in-loop.
2768struct RdxInLoop {};
2769/// This reduction is unordered with the partial result scaled down by some
2770/// factor.
2773};
2774using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2775
2776inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2777 unsigned ScaleFactor) {
2778 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2779 if (Ordered)
2780 return RdxOrdered{};
2781 if (InLoop)
2782 return RdxInLoop{};
2783 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2784}
2785
2786/// A recipe for handling reduction phis. The start value is the first operand
2787/// of the recipe and the incoming value from the backedge is the second
2788/// operand.
2790 /// The recurrence kind of the reduction.
2791 const RecurKind Kind;
2792
2793 ReductionStyle Style;
2794
2795 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2796 /// patterns for argmin/argmax).
2797 /// TODO: Also support cases where the phi itself has a single use, but its
2798 /// compare has multiple uses.
2799 bool HasUsesOutsideReductionChain;
2800
2801public:
2802 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2804 VPValue &BackedgeValue, ReductionStyle Style,
2805 const VPIRFlags &Flags,
2806 bool HasUsesOutsideReductionChain = false)
2807 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2808 VPIRFlags(Flags), Kind(Kind), Style(Style),
2809 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2810 addOperand(&BackedgeValue);
2811 }
2812
2813 ~VPReductionPHIRecipe() override = default;
2814
2816 VPValue *BackedgeValue) {
2817 return new VPReductionPHIRecipe(
2819 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2820 }
2821
2825
2826 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2827
2828 /// Generate the phi/select nodes.
2829 void execute(VPTransformState &State) override;
2830
2831 /// Get the factor that the VF of this recipe's output should be scaled by, or
2832 /// 1 if it isn't scaled.
2833 unsigned getVFScaleFactor() const {
2834 auto *Partial = std::get_if<RdxUnordered>(&Style);
2835 return Partial ? Partial->VFScaleFactor : 1;
2836 }
2837
2838 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2839 /// > 1.
2840 void setVFScaleFactor(unsigned ScaleFactor) {
2841 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2842 Style = RdxUnordered{ScaleFactor};
2843 }
2844
2845 /// Returns the number of incoming values, also number of incoming blocks.
2846 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2847 /// incoming value, its start value.
2848 unsigned getNumIncoming() const override { return 2; }
2849
2850 /// Returns the recurrence kind of the reduction.
2851 RecurKind getRecurrenceKind() const { return Kind; }
2852
2853 /// Returns true, if the phi is part of an ordered reduction.
2854 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2855
2856 /// Returns true if the phi is part of an in-loop reduction.
2857 bool isInLoop() const {
2858 return std::holds_alternative<RdxInLoop>(Style) ||
2859 std::holds_alternative<RdxOrdered>(Style);
2860 }
2861
2862 /// Returns true if the reduction outputs a vector with a scaled down VF.
2863 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2864
2865 /// Returns true, if the phi is part of a multi-use reduction.
2867 return HasUsesOutsideReductionChain;
2868 }
2869
2870 /// Returns true if the recipe only uses the first lane of operand \p Op.
2871 bool usesFirstLaneOnly(const VPValue *Op) const override {
2873 "Op must be an operand of the recipe");
2874 return isOrdered() || isInLoop();
2875 }
2876
2877protected:
2878#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2879 /// Print the recipe.
2880 void printRecipe(raw_ostream &O, const Twine &Indent,
2881 VPSlotTracker &SlotTracker) const override;
2882#endif
2883};
2884
2885/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2886/// instructions.
2888public:
2889 /// The blend operation is a User of the incoming values and of their
2890 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2891 /// be omitted (implied by passing an odd number of operands) in which case
2892 /// all other incoming values are merged into it.
2894 const VPIRFlags &Flags, DebugLoc DL)
2895 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands, Flags, DL) {
2896 assert(Operands.size() >= 2 && "Expected at least two operands!");
2897 setUnderlyingValue(Phi);
2898 }
2899
2901
2904 NewOperands, *this, getDebugLoc());
2905 }
2906
2907 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2908
2909 /// A normalized blend is one that has an odd number of operands, whereby the
2910 /// first operand does not have an associated mask.
2911 bool isNormalized() const { return getNumOperands() % 2; }
2912
2913 /// Return the number of incoming values, taking into account when normalized
2914 /// the first incoming value will have no mask.
2915 unsigned getNumIncomingValues() const {
2916 return (getNumOperands() + isNormalized()) / 2;
2917 }
2918
2919 /// Return incoming value number \p Idx.
2920 VPValue *getIncomingValue(unsigned Idx) const {
2921 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2922 }
2923
2924 /// Return mask number \p Idx.
2925 VPValue *getMask(unsigned Idx) const {
2926 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2927 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2928 }
2929
2930 /// Set mask number \p Idx to \p V.
2931 void setMask(unsigned Idx, VPValue *V) {
2932 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2933 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
2934 }
2935
2936 void execute(VPTransformState &State) override {
2937 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2938 }
2939
2940 /// Return the cost of this VPWidenMemoryRecipe.
2941 InstructionCost computeCost(ElementCount VF,
2942 VPCostContext &Ctx) const override;
2943
2944 /// Returns true if the recipe only uses the first lane of operand \p Op.
2945 bool usesFirstLaneOnly(const VPValue *Op) const override;
2946
2947protected:
2948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2949 /// Print the recipe.
2950 void printRecipe(raw_ostream &O, const Twine &Indent,
2951 VPSlotTracker &SlotTracker) const override;
2952#endif
2953};
2954
2955/// A common base class for interleaved memory operations.
2956/// An Interleaved memory operation is a memory access method that combines
2957/// multiple strided loads/stores into a single wide load/store with shuffles.
2958/// The first operand is the start address. The optional operands are, in order,
2959/// the stored values and the mask.
2961 public VPIRMetadata {
2963
2964 /// Indicates if the interleave group is in a conditional block and requires a
2965 /// mask.
2966 bool HasMask = false;
2967
2968 /// Indicates if gaps between members of the group need to be masked out or if
2969 /// unusued gaps can be loaded speculatively.
2970 bool NeedsMaskForGaps = false;
2971
2972protected:
2973 VPInterleaveBase(const unsigned char SC,
2975 ArrayRef<VPValue *> Operands,
2976 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2977 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
2978 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
2979 NeedsMaskForGaps(NeedsMaskForGaps) {
2980 // TODO: extend the masked interleaved-group support to reversed access.
2981 assert((!Mask || !IG->isReverse()) &&
2982 "Reversed masked interleave-group not supported.");
2983 if (StoredValues.empty()) {
2984 for (Instruction *Inst : IG->members()) {
2985 assert(!Inst->getType()->isVoidTy() && "must have result");
2986 new VPMultiDefValue(this, Inst, Inst->getType());
2987 }
2988 } else {
2989 for (auto *SV : StoredValues)
2990 addOperand(SV);
2991 }
2992 if (Mask) {
2993 HasMask = true;
2994 addOperand(Mask);
2995 }
2996 }
2997
2998public:
2999 VPInterleaveBase *clone() override = 0;
3000
3001 static inline bool classof(const VPRecipeBase *R) {
3002 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3003 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3004 }
3005
3006 static inline bool classof(const VPUser *U) {
3007 auto *R = dyn_cast<VPRecipeBase>(U);
3008 return R && classof(R);
3009 }
3010
3011 /// Return the address accessed by this recipe.
3012 VPValue *getAddr() const {
3013 return getOperand(0); // Address is the 1st, mandatory operand.
3014 }
3015
3016 /// Return the mask used by this recipe. Note that a full mask is represented
3017 /// by a nullptr.
3018 VPValue *getMask() const {
3019 // Mask is optional and the last operand.
3020 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
3021 }
3022
3023 /// Return true if the access needs a mask because of the gaps.
3024 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3025
3027
3028 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3029
3030 void execute(VPTransformState &State) override {
3031 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3032 }
3033
3034 /// Return the cost of this recipe.
3035 InstructionCost computeCost(ElementCount VF,
3036 VPCostContext &Ctx) const override;
3037
3038 /// Returns true if the recipe only uses the first lane of operand \p Op.
3039 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3040
3041 /// Returns the number of stored operands of this interleave group. Returns 0
3042 /// for load interleave groups.
3043 virtual unsigned getNumStoreOperands() const = 0;
3044
3045 /// Return the VPValues stored by this interleave group. If it is a load
3046 /// interleave group, return an empty ArrayRef.
3048 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3050 }
3051};
3052
3053/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3054/// or stores into one wide load/store and shuffles. The first operand of a
3055/// VPInterleave recipe is the address, followed by the stored values, followed
3056/// by an optional mask.
3058public:
3060 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3061 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3062 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3063 Mask, NeedsMaskForGaps, MD, DL) {}
3064
3065 ~VPInterleaveRecipe() override = default;
3066
3070 needsMaskForGaps(), *this, getDebugLoc());
3071 }
3072
3073 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3074
3075 /// Generate the wide load or store, and shuffles.
3076 void execute(VPTransformState &State) override;
3077
3078 bool usesFirstLaneOnly(const VPValue *Op) const override {
3080 "Op must be an operand of the recipe");
3081 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
3082 }
3083
3084 unsigned getNumStoreOperands() const override {
3085 return getNumOperands() - (getMask() ? 2 : 1);
3086 }
3087
3088protected:
3089#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3090 /// Print the recipe.
3091 void printRecipe(raw_ostream &O, const Twine &Indent,
3092 VPSlotTracker &SlotTracker) const override;
3093#endif
3094};
3095
3096/// A recipe for interleaved memory operations with vector-predication
3097/// intrinsics. The first operand is the address, the second operand is the
3098/// explicit vector length. Stored values and mask are optional operands.
3100public:
3102 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3103 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3104 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3105 R.getDebugLoc()) {
3106 assert(!getInterleaveGroup()->isReverse() &&
3107 "Reversed interleave-group with tail folding is not supported.");
3108 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3109 "supported for scalable vector.");
3110 }
3111
3112 ~VPInterleaveEVLRecipe() override = default;
3113
3115 llvm_unreachable("cloning not implemented yet");
3116 }
3117
3118 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3119
3120 /// The VPValue of the explicit vector length.
3121 VPValue *getEVL() const { return getOperand(1); }
3122
3123 /// Generate the wide load or store, and shuffles.
3124 void execute(VPTransformState &State) override;
3125
3126 /// The recipe only uses the first lane of the address, and EVL operand.
3127 bool usesFirstLaneOnly(const VPValue *Op) const override {
3129 "Op must be an operand of the recipe");
3130 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3131 Op == getEVL();
3132 }
3133
3134 unsigned getNumStoreOperands() const override {
3135 return getNumOperands() - (getMask() ? 3 : 2);
3136 }
3137
3138protected:
3139#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3140 /// Print the recipe.
3141 void printRecipe(raw_ostream &O, const Twine &Indent,
3142 VPSlotTracker &SlotTracker) const override;
3143#endif
3144};
3145
3146/// A recipe to represent inloop, ordered or partial reduction operations. It
3147/// performs a reduction on a vector operand into a scalar (vector in the case
3148/// of a partial reduction) value, and adds the result to a chain. The Operands
3149/// are {ChainOp, VecOp, [Condition]}.
3151
3152 /// The recurrence kind for the reduction in question.
3153 RecurKind RdxKind;
3154 /// Whether the reduction is conditional.
3155 bool IsConditional = false;
3156 ReductionStyle Style;
3157
3158protected:
3159 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3161 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3162 ReductionStyle Style, DebugLoc DL)
3163 : VPRecipeWithIRFlags(SC, Operands, FMFs, DL), RdxKind(RdxKind),
3164 Style(Style) {
3165 if (CondOp) {
3166 IsConditional = true;
3167 addOperand(CondOp);
3168 }
3170 }
3171
3172public:
3174 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3176 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3177 {ChainOp, VecOp}, CondOp, Style, DL) {}
3178
3180 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3182 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3183 {ChainOp, VecOp}, CondOp, Style, DL) {}
3184
3185 ~VPReductionRecipe() override = default;
3186
3188 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3190 getCondOp(), Style, getDebugLoc());
3191 }
3192
3193 static inline bool classof(const VPRecipeBase *R) {
3194 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3195 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3196 }
3197
3198 static inline bool classof(const VPUser *U) {
3199 auto *R = dyn_cast<VPRecipeBase>(U);
3200 return R && classof(R);
3201 }
3202
3203 static inline bool classof(const VPValue *VPV) {
3204 const VPRecipeBase *R = VPV->getDefiningRecipe();
3205 return R && classof(R);
3206 }
3207
3208 static inline bool classof(const VPSingleDefRecipe *R) {
3209 return classof(static_cast<const VPRecipeBase *>(R));
3210 }
3211
3212 /// Generate the reduction in the loop.
3213 void execute(VPTransformState &State) override;
3214
3215 /// Return the cost of VPReductionRecipe.
3216 InstructionCost computeCost(ElementCount VF,
3217 VPCostContext &Ctx) const override;
3218
3219 /// Return the recurrence kind for the in-loop reduction.
3220 RecurKind getRecurrenceKind() const { return RdxKind; }
3221 /// Return true if the in-loop reduction is ordered.
3222 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3223 /// Return true if the in-loop reduction is conditional.
3224 bool isConditional() const { return IsConditional; };
3225 /// Returns true if the reduction outputs a vector with a scaled down VF.
3226 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3227 /// Returns true if the reduction is in-loop.
3228 bool isInLoop() const {
3229 return std::holds_alternative<RdxInLoop>(Style) ||
3230 std::holds_alternative<RdxOrdered>(Style);
3231 }
3232 /// The VPValue of the scalar Chain being accumulated.
3233 VPValue *getChainOp() const { return getOperand(0); }
3234 /// The VPValue of the vector value to be reduced.
3235 VPValue *getVecOp() const { return getOperand(1); }
3236 /// The VPValue of the condition for the block.
3238 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3239 }
3240 /// Get the factor that the VF of this recipe's output should be scaled by, or
3241 /// 1 if it isn't scaled.
3242 unsigned getVFScaleFactor() const {
3243 auto *Partial = std::get_if<RdxUnordered>(&Style);
3244 return Partial ? Partial->VFScaleFactor : 1;
3245 }
3246
3247protected:
3248#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3249 /// Print the recipe.
3250 void printRecipe(raw_ostream &O, const Twine &Indent,
3251 VPSlotTracker &SlotTracker) const override;
3252#endif
3253};
3254
3255/// A recipe to represent inloop reduction operations with vector-predication
3256/// intrinsics, performing a reduction on a vector operand with the explicit
3257/// vector length (EVL) into a scalar value, and adding the result to a chain.
3258/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3260public:
3263 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3264 R.getFastMathFlags(),
3266 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3267 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3268 DL) {}
3269
3270 ~VPReductionEVLRecipe() override = default;
3271
3273 llvm_unreachable("cloning not implemented yet");
3274 }
3275
3276 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3277
3278 /// Generate the reduction in the loop
3279 void execute(VPTransformState &State) override;
3280
3281 /// The VPValue of the explicit vector length.
3282 VPValue *getEVL() const { return getOperand(2); }
3283
3284 /// Returns true if the recipe only uses the first lane of operand \p Op.
3285 bool usesFirstLaneOnly(const VPValue *Op) const override {
3287 "Op must be an operand of the recipe");
3288 return Op == getEVL();
3289 }
3290
3291protected:
3292#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3293 /// Print the recipe.
3294 void printRecipe(raw_ostream &O, const Twine &Indent,
3295 VPSlotTracker &SlotTracker) const override;
3296#endif
3297};
3298
3299/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3300/// copies of the original scalar type, one per lane, instead of producing a
3301/// single copy of widened type for all lanes. If the instruction is known to be
3302/// a single scalar, only one copy will be generated.
3304 public VPIRMetadata {
3305 /// Indicator if only a single replica per lane is needed.
3306 bool IsSingleScalar;
3307
3308 /// Indicator if the replicas are also predicated.
3309 bool IsPredicated;
3310
3311public:
3313 bool IsSingleScalar, VPValue *Mask = nullptr,
3314 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3315 DebugLoc DL = DebugLoc::getUnknown())
3316 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands, Flags, DL),
3317 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3318 IsPredicated(Mask) {
3319 setUnderlyingValue(I);
3320 if (Mask)
3321 addOperand(Mask);
3322 }
3323
3324 ~VPReplicateRecipe() override = default;
3325
3327 auto *Copy = new VPReplicateRecipe(
3328 getUnderlyingInstr(), operands(), IsSingleScalar,
3329 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3330 Copy->transferFlags(*this);
3331 return Copy;
3332 }
3333
3334 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3335
3336 /// Generate replicas of the desired Ingredient. Replicas will be generated
3337 /// for all parts and lanes unless a specific part and lane are specified in
3338 /// the \p State.
3339 void execute(VPTransformState &State) override;
3340
3341 /// Return the cost of this VPReplicateRecipe.
3342 InstructionCost computeCost(ElementCount VF,
3343 VPCostContext &Ctx) const override;
3344
3345 /// Return the cost of scalarizing a call to \p CalledFn with argument
3346 /// operands \p ArgOps for a given \p VF.
3347 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3349 bool IsSingleScalar, ElementCount VF,
3350 VPCostContext &Ctx);
3351
3352 bool isSingleScalar() const { return IsSingleScalar; }
3353
3354 bool isPredicated() const { return IsPredicated; }
3355
3356 /// Returns true if the recipe only uses the first lane of operand \p Op.
3357 bool usesFirstLaneOnly(const VPValue *Op) const override {
3359 "Op must be an operand of the recipe");
3360 return isSingleScalar();
3361 }
3362
3363 /// Returns true if the recipe uses scalars of operand \p Op.
3364 bool usesScalars(const VPValue *Op) const override {
3366 "Op must be an operand of the recipe");
3367 return true;
3368 }
3369
3370 /// Return the mask of a predicated VPReplicateRecipe.
3372 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3373 return getOperand(getNumOperands() - 1);
3374 }
3375
3376 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3377
3378protected:
3379#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3380 /// Print the recipe.
3381 void printRecipe(raw_ostream &O, const Twine &Indent,
3382 VPSlotTracker &SlotTracker) const override;
3383#endif
3384};
3385
3386/// A recipe for generating conditional branches on the bits of a mask.
3388public:
3390 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3391
3394 }
3395
3396 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3397
3398 /// Generate the extraction of the appropriate bit from the block mask and the
3399 /// conditional branch.
3400 void execute(VPTransformState &State) override;
3401
3402 /// Return the cost of this VPBranchOnMaskRecipe.
3403 InstructionCost computeCost(ElementCount VF,
3404 VPCostContext &Ctx) const override;
3405
3406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3407 /// Print the recipe.
3408 void printRecipe(raw_ostream &O, const Twine &Indent,
3409 VPSlotTracker &SlotTracker) const override {
3410 O << Indent << "BRANCH-ON-MASK ";
3412 }
3413#endif
3414
3415 /// Returns true if the recipe uses scalars of operand \p Op.
3416 bool usesScalars(const VPValue *Op) const override {
3418 "Op must be an operand of the recipe");
3419 return true;
3420 }
3421};
3422
3423/// A recipe to combine multiple recipes into a single 'expression' recipe,
3424/// which should be considered a single entity for cost-modeling and transforms.
3425/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3426/// expression recipes, before execute. The individual expression recipes are
3427/// completely disconnected from the def-use graph of other recipes not part of
3428/// the expression. Def-use edges between pairs of expression recipes remain
3429/// intact, whereas every edge between an expression recipe and a recipe outside
3430/// the expression is elevated to connect the non-expression recipe with the
3431/// VPExpressionRecipe itself.
3432class VPExpressionRecipe : public VPSingleDefRecipe {
3433 /// Recipes included in this VPExpressionRecipe. This could contain
3434 /// duplicates.
3435 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3436
3437 /// Temporary VPValues used for external operands of the expression, i.e.
3438 /// operands not defined by recipes in the expression.
3439 SmallVector<VPValue *> LiveInPlaceholders;
3440
3441 enum class ExpressionTypes {
3442 /// Represents an inloop extended reduction operation, performing a
3443 /// reduction on an extended vector operand into a scalar value, and adding
3444 /// the result to a chain.
3445 ExtendedReduction,
3446 /// Represent an inloop multiply-accumulate reduction, multiplying the
3447 /// extended vector operands, performing a reduction.add on the result, and
3448 /// adding the scalar result to a chain.
3449 ExtMulAccReduction,
3450 /// Represent an inloop multiply-accumulate reduction, multiplying the
3451 /// vector operands, performing a reduction.add on the result, and adding
3452 /// the scalar result to a chain.
3453 MulAccReduction,
3454 /// Represent an inloop multiply-accumulate reduction, multiplying the
3455 /// extended vector operands, negating the multiplication, performing a
3456 /// reduction.add on the result, and adding the scalar result to a chain.
3457 ExtNegatedMulAccReduction,
3458 };
3459
3460 /// Type of the expression.
3461 ExpressionTypes ExpressionType;
3462
3463 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3464 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3465 /// in the expression) are replaced by temporary VPValues and the original
3466 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3467 /// as needed (excluding last) to ensure they are only used by other recipes
3468 /// in the expression.
3469 VPExpressionRecipe(ExpressionTypes ExpressionType,
3470 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3471
3472public:
3474 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3476 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3479 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3480 {Ext0, Ext1, Mul, Red}) {}
3483 VPReductionRecipe *Red)
3484 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3485 {Ext0, Ext1, Mul, Sub, Red}) {
3486 assert(Mul->getOpcode() == Instruction::Mul && "Expected a mul");
3487 assert(Red->getRecurrenceKind() == RecurKind::Add &&
3488 "Expected an add reduction");
3489 assert(getNumOperands() >= 3 && "Expected at least three operands");
3490 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3491 assert(SubConst && SubConst->isZero() &&
3492 Sub->getOpcode() == Instruction::Sub && "Expected a negating sub");
3493 }
3494
3496 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3497 for (auto *R : reverse(ExpressionRecipes)) {
3498 if (ExpressionRecipesSeen.insert(R).second)
3499 delete R;
3500 }
3501 for (VPValue *T : LiveInPlaceholders)
3502 delete T;
3503 }
3504
3505 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3506
3507 VPExpressionRecipe *clone() override {
3508 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3509 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3510 for (auto *R : ExpressionRecipes)
3511 NewExpressiondRecipes.push_back(R->clone());
3512 for (auto *New : NewExpressiondRecipes) {
3513 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3514 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3515 // Update placeholder operands in the cloned recipe to use the external
3516 // operands, to be internalized when the cloned expression is constructed.
3517 for (const auto &[Placeholder, OutsideOp] :
3518 zip(LiveInPlaceholders, operands()))
3519 New->replaceUsesOfWith(Placeholder, OutsideOp);
3520 }
3521 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3522 }
3523
3524 /// Return the VPValue to use to infer the result type of the recipe.
3526 unsigned OpIdx =
3527 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3528 : 1;
3529 return getOperand(getNumOperands() - OpIdx);
3530 }
3531
3532 /// Insert the recipes of the expression back into the VPlan, directly before
3533 /// the current recipe. Leaves the expression recipe empty, which must be
3534 /// removed before codegen.
3535 void decompose();
3536
3537 unsigned getVFScaleFactor() const {
3538 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3539 return PR ? PR->getVFScaleFactor() : 1;
3540 }
3541
3542 /// Method for generating code, must not be called as this recipe is abstract.
3543 void execute(VPTransformState &State) override {
3544 llvm_unreachable("recipe must be removed before execute");
3545 }
3546
3548 VPCostContext &Ctx) const override;
3549
3550 /// Returns true if this expression contains recipes that may read from or
3551 /// write to memory.
3552 bool mayReadOrWriteMemory() const;
3553
3554 /// Returns true if this expression contains recipes that may have side
3555 /// effects.
3556 bool mayHaveSideEffects() const;
3557
3558 /// Returns true if the result of this VPExpressionRecipe is a single-scalar.
3559 bool isSingleScalar() const;
3560
3561protected:
3562#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3563 /// Print the recipe.
3564 void printRecipe(raw_ostream &O, const Twine &Indent,
3565 VPSlotTracker &SlotTracker) const override;
3566#endif
3567};
3568
3569/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3570/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3571/// order to merge values that are set under such a branch and feed their uses.
3572/// The phi nodes can be scalar or vector depending on the users of the value.
3573/// This recipe works in concert with VPBranchOnMaskRecipe.
3575public:
3576 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3577 /// nodes after merging back from a Branch-on-Mask.
3579 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3580 getScalarTypeOrInfer(PredV), /*UV=*/nullptr, DL) {}
3581 ~VPPredInstPHIRecipe() override = default;
3582
3584 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3585 }
3586
3587 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3588
3589 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3590 /// retain SSA form.
3591 void execute(VPTransformState &State) override;
3592
3593 /// Return the cost of this VPPredInstPHIRecipe.
3595 VPCostContext &Ctx) const override {
3596 // TODO: Compute accurate cost after retiring the legacy cost model.
3597 return 0;
3598 }
3599
3600protected:
3601#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3602 /// Print the recipe.
3603 void printRecipe(raw_ostream &O, const Twine &Indent,
3604 VPSlotTracker &SlotTracker) const override;
3605#endif
3606};
3607
3608/// A common mixin class for widening memory operations. An optional mask can be
3609/// provided as the last operand.
3611protected:
3613
3614 /// Alignment information for this memory access.
3616
3617 /// Whether the accessed addresses are consecutive.
3619
3620 /// Whether the memory access is masked.
3621 bool IsMasked = false;
3622
3623 void setMask(VPValue *Mask) {
3624 assert(!IsMasked && "cannot re-set mask");
3625 if (!Mask)
3626 return;
3627 getAsRecipe()->addOperand(Mask);
3628 IsMasked = true;
3629 }
3630
3635
3636public:
3637 virtual ~VPWidenMemoryRecipe() = default;
3638
3639 /// Return a VPRecipeBase* to the current object.
3641 virtual const VPRecipeBase *getAsRecipe() const = 0;
3642
3643 /// Return whether the loaded-from / stored-to addresses are consecutive.
3644 bool isConsecutive() const { return Consecutive; }
3645
3646 /// Return the address accessed by this recipe.
3647 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3648
3649 /// Returns true if the recipe is masked.
3650 bool isMasked() const { return IsMasked; }
3651
3652 /// Return the mask used by this recipe. Note that a full mask is represented
3653 /// by a nullptr.
3654 VPValue *getMask() const {
3655 // Mask is optional and therefore the last operand.
3656 const VPRecipeBase *R = getAsRecipe();
3657 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3658 }
3659
3660 /// Returns the alignment of the memory access.
3661 Align getAlign() const { return Alignment; }
3662
3663 /// Return the cost of this VPWidenMemoryRecipe.
3664 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3665
3667};
3668
3669/// A recipe for widening load operations, using the address to load from and an
3670/// optional mask.
3672 public VPWidenMemoryRecipe {
3674 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3675 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3676 &Load, DL),
3677 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3678 setMask(Mask);
3679 }
3680
3683 getMask(), Consecutive, *this, getDebugLoc());
3684 }
3685
3686 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3687
3688 /// Generate a wide load or gather.
3689 void execute(VPTransformState &State) override;
3690
3691 /// Return the cost of this VPWidenLoadRecipe.
3693 VPCostContext &Ctx) const override {
3694 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3695 }
3696
3697 /// Returns true if the recipe only uses the first lane of operand \p Op.
3698 bool usesFirstLaneOnly(const VPValue *Op) const override {
3700 "Op must be an operand of the recipe");
3701 // Widened, consecutive loads operations only demand the first lane of
3702 // their address.
3703 return Op == getAddr() && isConsecutive();
3704 }
3705
3706protected:
3707 VPRecipeBase *getAsRecipe() override { return this; }
3708 const VPRecipeBase *getAsRecipe() const override { return this; }
3709
3710#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3711 /// Print the recipe.
3712 void printRecipe(raw_ostream &O, const Twine &Indent,
3713 VPSlotTracker &SlotTracker) const override;
3714#endif
3715};
3716
3717/// A recipe for widening load operations with vector-predication intrinsics,
3718/// using the address to load from, the explicit vector length and an optional
3719/// mask.
3721 public VPWidenMemoryRecipe {
3723 VPValue *Mask)
3724 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3725 L.getIngredient().getType(), &L.getIngredient(),
3726 L.getDebugLoc()),
3727 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3728 setMask(Mask);
3729 }
3730
3732 llvm_unreachable("cloning not supported");
3733 }
3734
3735 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3736
3737 /// Return the EVL operand.
3738 VPValue *getEVL() const { return getOperand(1); }
3739
3740 /// Generate the wide load or gather.
3741 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3742
3743 /// Return the cost of this VPWidenLoadEVLRecipe.
3745 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3746
3747 /// Returns true if the recipe only uses the first lane of operand \p Op.
3748 bool usesFirstLaneOnly(const VPValue *Op) const override {
3750 "Op must be an operand of the recipe");
3751 // Widened loads only demand the first lane of EVL and consecutive loads
3752 // only demand the first lane of their address.
3753 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3754 }
3755
3756protected:
3757 VPRecipeBase *getAsRecipe() override { return this; }
3758 const VPRecipeBase *getAsRecipe() const override { return this; }
3759
3760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3761 /// Print the recipe.
3762 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3763 VPSlotTracker &SlotTracker) const override;
3764#endif
3765};
3766
3767/// A recipe for widening store operations, using the stored value, the address
3768/// to store to and an optional mask.
3770 public VPWidenMemoryRecipe {
3771 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3772 VPValue *Mask, bool Consecutive,
3773 const VPIRMetadata &Metadata, DebugLoc DL)
3774 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3775 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3776 setMask(Mask);
3777 }
3778
3782 *this, getDebugLoc());
3783 }
3784
3785 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3786
3787 /// Return the value stored by this recipe.
3788 VPValue *getStoredValue() const { return getOperand(1); }
3789
3790 /// Generate a wide store or scatter.
3791 void execute(VPTransformState &State) override;
3792
3793 /// Return the cost of this VPWidenStoreRecipe.
3795 VPCostContext &Ctx) const override {
3796 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3797 }
3798
3799 /// Returns true if the recipe only uses the first lane of operand \p Op.
3800 bool usesFirstLaneOnly(const VPValue *Op) const override {
3802 "Op must be an operand of the recipe");
3803 // Widened, consecutive stores only demand the first lane of their address,
3804 // unless the same operand is also stored.
3805 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3806 }
3807
3808protected:
3809 VPRecipeBase *getAsRecipe() override { return this; }
3810 const VPRecipeBase *getAsRecipe() const override { return this; }
3811
3812#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3813 /// Print the recipe.
3814 void printRecipe(raw_ostream &O, const Twine &Indent,
3815 VPSlotTracker &SlotTracker) const override;
3816#endif
3817};
3818
3819/// A recipe for widening store operations with vector-predication intrinsics,
3820/// using the value to store, the address to store to, the explicit vector
3821/// length and an optional mask.
3823 public VPWidenMemoryRecipe {
3825 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3826 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3827 S.getDebugLoc()),
3828 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3829 setMask(Mask);
3830 }
3831
3833 llvm_unreachable("cloning not supported");
3834 }
3835
3836 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3837
3838 /// Return the address accessed by this recipe.
3839 VPValue *getStoredValue() const { return getOperand(1); }
3840
3841 /// Return the EVL operand.
3842 VPValue *getEVL() const { return getOperand(2); }
3843
3844 /// Generate the wide store or scatter.
3845 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3846
3847 /// Return the cost of this VPWidenStoreEVLRecipe.
3849 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3850
3851 /// Returns true if the recipe only uses the first lane of operand \p Op.
3852 bool usesFirstLaneOnly(const VPValue *Op) const override {
3854 "Op must be an operand of the recipe");
3855 if (Op == getEVL()) {
3856 assert(getStoredValue() != Op && "unexpected store of EVL");
3857 return true;
3858 }
3859 // Widened, consecutive memory operations only demand the first lane of
3860 // their address, unless the same operand is also stored. That latter can
3861 // happen with opaque pointers.
3862 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3863 }
3864
3865protected:
3866 VPRecipeBase *getAsRecipe() override { return this; }
3867 const VPRecipeBase *getAsRecipe() const override { return this; }
3868
3869#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3870 /// Print the recipe.
3871 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3872 VPSlotTracker &SlotTracker) const override;
3873#endif
3874};
3875
3876/// Recipe to expand a SCEV expression.
3878 const SCEV *Expr;
3879
3880public:
3881 VPExpandSCEVRecipe(const SCEV *Expr);
3882
3883 ~VPExpandSCEVRecipe() override = default;
3884
3885 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
3886
3887 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
3888
3889 void execute(VPTransformState &State) override {
3890 llvm_unreachable("SCEV expressions must be expanded before final execute");
3891 }
3892
3893 /// Return the cost of this VPExpandSCEVRecipe.
3895 VPCostContext &Ctx) const override {
3896 // TODO: Compute accurate cost after retiring the legacy cost model.
3897 return 0;
3898 }
3899
3900 const SCEV *getSCEV() const { return Expr; }
3901
3902protected:
3903#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3904 /// Print the recipe.
3905 void printRecipe(raw_ostream &O, const Twine &Indent,
3906 VPSlotTracker &SlotTracker) const override;
3907#endif
3908};
3909
3910/// A recipe for generating the active lane mask for the vector loop that is
3911/// used to predicate the vector operations.
3913public:
3915 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
3916 StartMask, DL) {}
3917
3918 ~VPActiveLaneMaskPHIRecipe() override = default;
3919
3922 if (getNumOperands() == 2)
3923 R->addOperand(getOperand(1));
3924 return R;
3925 }
3926
3927 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
3928
3929 /// Generate the active lane mask phi of the vector loop.
3930 void execute(VPTransformState &State) override;
3931
3932protected:
3933#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3934 /// Print the recipe.
3935 void printRecipe(raw_ostream &O, const Twine &Indent,
3936 VPSlotTracker &SlotTracker) const override;
3937#endif
3938};
3939
3940/// A recipe for generating the phi node tracking the current scalar iteration
3941/// index. It starts at the start value of the canonical induction and gets
3942/// incremented by the number of scalar iterations processed by the vector loop
3943/// iteration. The increment does not have to be loop invariant.
3945public:
3947 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
3948 StartIV, DL) {}
3949
3950 ~VPCurrentIterationPHIRecipe() override = default;
3951
3953 llvm_unreachable("cloning not implemented yet");
3954 }
3955
3956 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
3957
3958 void execute(VPTransformState &State) override {
3959 llvm_unreachable("cannot execute this recipe, should be replaced by a "
3960 "scalar phi recipe");
3961 }
3962
3963 /// Return the cost of this VPCurrentIterationPHIRecipe.
3965 VPCostContext &Ctx) const override {
3966 // For now, match the behavior of the legacy cost model.
3967 return 0;
3968 }
3969
3970 /// Returns true if the recipe only uses the first lane of operand \p Op.
3971 bool usesFirstLaneOnly(const VPValue *Op) const override {
3973 "Op must be an operand of the recipe");
3974 return true;
3975 }
3976
3977protected:
3978#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3979 /// Print the recipe.
3980 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3981 VPSlotTracker &SlotTracker) const override;
3982#endif
3983};
3984
3985/// A Recipe for widening the canonical induction variable of the vector loop.
3986/// First operand is the canonical IV recipe, a second step operand (VF * Part)
3987/// is added during unrolling.
3989public:
3991 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
3992 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
3993 CanonicalIV->getType(), Flags) {}
3994
3995 ~VPWidenCanonicalIVRecipe() override = default;
3996
3998 auto *WideCanIV =
4000 if (VPValue *Step = getStepValue())
4001 WideCanIV->addOperand(Step);
4002 return WideCanIV;
4003 }
4004
4005 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4006
4007 void execute(VPTransformState &State) override {
4008 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4009 }
4010
4011 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4013 VPCostContext &Ctx) const override {
4014 // TODO: Compute accurate cost after retiring the legacy cost model.
4015 return 0;
4016 }
4017
4018 /// Return the canonical IV being widened.
4022
4024 return getNumOperands() == 2 ? getOperand(1) : nullptr;
4025 }
4026
4027protected:
4028#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4029 /// Print the recipe.
4030 void printRecipe(raw_ostream &O, const Twine &Indent,
4031 VPSlotTracker &SlotTracker) const override;
4032#endif
4033};
4034
4035/// A recipe for converting the input value \p IV value to the corresponding
4036/// value of an IV with different start and step values, using Start + IV *
4037/// Step.
4039 /// Kind of the induction.
4041 /// If not nullptr, the floating point induction binary operator. Must be set
4042 /// for floating point inductions.
4043 const FPMathOperator *FPBinOp;
4044
4045public:
4047 VPValue *CanonicalIV, VPValue *Step)
4049 IndDesc.getKind(),
4050 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
4051 Start, CanonicalIV, Step) {}
4052
4054 const FPMathOperator *FPBinOp, VPIRValue *Start,
4055 VPValue *IV, VPValue *Step)
4056 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4057 Start->getScalarType(), nullptr),
4058 Kind(Kind), FPBinOp(FPBinOp) {}
4059
4060 ~VPDerivedIVRecipe() override = default;
4061
4063 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4064 getStepValue());
4065 }
4066
4067 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4068
4069 void execute(VPTransformState &State) override {
4070 llvm_unreachable("Expected prior expansion of this recipe");
4071 }
4072
4073 /// Return the cost of this VPDerivedIVRecipe.
4075 VPCostContext &Ctx) const override {
4076 // TODO: Compute accurate cost after retiring the legacy cost model.
4077 return 0;
4078 }
4079
4081 VPValue *getIndex() const { return getOperand(1); }
4082 VPValue *getStepValue() const { return getOperand(2); }
4083 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4085
4086 /// Returns true if the recipe only uses the first lane of operand \p Op.
4087 bool usesFirstLaneOnly(const VPValue *Op) const override {
4089 "Op must be an operand of the recipe");
4090 return true;
4091 }
4092
4093protected:
4094#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4095 /// Print the recipe.
4096 void printRecipe(raw_ostream &O, const Twine &Indent,
4097 VPSlotTracker &SlotTracker) const override;
4098#endif
4099};
4100
4101/// A recipe for handling phi nodes of integer and floating-point inductions,
4102/// producing their scalar values. Before unrolling by UF the recipe represents
4103/// the VF*UF scalar values to be produced, or UF scalar values if only first
4104/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4105/// operand StartIndex to all unroll parts except part 0, as the recipe
4106/// represents the VF scalar values (this number of values is taken from
4107/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4109 Instruction::BinaryOps InductionOpcode;
4110
4111public:
4114 DebugLoc DL)
4115 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4116 getScalarTypeOrInfer(IV), FMFs, DL),
4117 InductionOpcode(Opcode) {}
4118
4120 VPValue *Step, VPValue *VF,
4123 IV, Step, VF, IndDesc.getInductionOpcode(),
4124 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4125 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4126 : FastMathFlags(),
4127 DL) {}
4128
4129 ~VPScalarIVStepsRecipe() override = default;
4130
4132 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4133 getOperand(2), InductionOpcode,
4135 if (VPValue *StartIndex = getStartIndex())
4136 NewR->setStartIndex(StartIndex);
4137 return NewR;
4138 }
4139
4140 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4141
4142 /// Generate the scalarized versions of the phi node as needed by their users.
4143 void execute(VPTransformState &State) override;
4144
4145 /// Return the cost of this VPScalarIVStepsRecipe.
4147 VPCostContext &Ctx) const override {
4148 // TODO: Compute accurate cost after retiring the legacy cost model.
4149 return 0;
4150 }
4151
4152 VPValue *getStepValue() const { return getOperand(1); }
4153
4154 /// Return the number of scalars to produce per unroll part, used to compute
4155 /// StartIndex during unrolling.
4156 VPValue *getVFValue() const { return getOperand(2); }
4157
4158 /// Return the StartIndex, or null if known to be zero, valid only after
4159 /// unrolling.
4161 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4162 }
4163
4164 /// Set or add the StartIndex operand.
4165 void setStartIndex(VPValue *StartIndex) {
4166 if (getNumOperands() == 4)
4167 setOperand(3, StartIndex);
4168 else
4169 addOperand(StartIndex);
4170 }
4171
4172 /// Returns true if the recipe only uses the first lane of operand \p Op.
4173 bool usesFirstLaneOnly(const VPValue *Op) const override {
4175 "Op must be an operand of the recipe");
4176 return true;
4177 }
4178
4179 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4180
4181protected:
4182#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4183 /// Print the recipe.
4184 void printRecipe(raw_ostream &O, const Twine &Indent,
4185 VPSlotTracker &SlotTracker) const override;
4186#endif
4187};
4188
4189/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4190/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4191/// VPIRMetadata).
4192namespace vpdetail {
4193template <typename VPMixin, typename... RecipeTys>
4195 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4196 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4197 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4198 "Each type in RecipeTys must derive from VPMixin");
4199
4200 /// Used by isa.
4201 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4202
4203 /// Used by cast.
4204 static VPMixin *doCast(VPRecipeBase *R) {
4205 VPMixin *Out = nullptr;
4206 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4207 assert(Out && "Illegal recipe for cast");
4208 return Out;
4209 }
4210 static VPMixin *castFailed() { return nullptr; }
4211};
4212} // namespace vpdetail
4213
4214/// Support casting from VPRecipeBase -> VPPhiAccessors.
4215template <>
4219
4220template <>
4225template <>
4227 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4228 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4229
4230/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4231template <>
4236template <>
4241
4242/// Support casting from VPRecipeBase -> VPIRMetadata.
4243template <>
4249
4250template <>
4255template <>
4257 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4258 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4259
4260/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4261/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4262/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4263class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4264 friend class VPlan;
4265
4266 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4267 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4268 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4269 if (Recipe)
4270 appendRecipe(Recipe);
4271 }
4272
4273public:
4275
4276protected:
4277 /// The VPRecipes held in the order of output instructions to generate.
4279
4280 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4281 : VPBlockBase(BlockSC, Name.str()) {}
4282
4283public:
4284 ~VPBasicBlock() override {
4285 while (!Recipes.empty())
4286 Recipes.pop_back();
4287 }
4288
4289 /// Instruction iterators...
4294
4295 //===--------------------------------------------------------------------===//
4296 /// Recipe iterator methods
4297 ///
4298 inline iterator begin() { return Recipes.begin(); }
4299 inline const_iterator begin() const { return Recipes.begin(); }
4300 inline iterator end() { return Recipes.end(); }
4301 inline const_iterator end() const { return Recipes.end(); }
4302
4303 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4304 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4305 inline reverse_iterator rend() { return Recipes.rend(); }
4306 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4307
4308 inline size_t size() const { return Recipes.size(); }
4309 inline bool empty() const { return Recipes.empty(); }
4310 inline const VPRecipeBase &front() const { return Recipes.front(); }
4311 inline VPRecipeBase &front() { return Recipes.front(); }
4312 inline const VPRecipeBase &back() const { return Recipes.back(); }
4313 inline VPRecipeBase &back() { return Recipes.back(); }
4314
4315 /// Returns a reference to the list of recipes.
4317
4318 /// Returns a pointer to a member of the recipe list.
4319 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4320 return &VPBasicBlock::Recipes;
4321 }
4322
4323 /// Method to support type inquiry through isa, cast, and dyn_cast.
4324 static inline bool classof(const VPBlockBase *V) {
4325 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4326 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4327 }
4328
4329 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4330 assert(Recipe && "No recipe to append.");
4331 assert(!Recipe->Parent && "Recipe already in VPlan");
4332 Recipe->Parent = this;
4333 Recipes.insert(InsertPt, Recipe);
4334 }
4335
4336 /// Augment the existing recipes of a VPBasicBlock with an additional
4337 /// \p Recipe as the last recipe.
4338 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4339
4340 /// The method which generates the output IR instructions that correspond to
4341 /// this VPBasicBlock, thereby "executing" the VPlan.
4342 void execute(VPTransformState *State) override;
4343
4344 /// Return the cost of this VPBasicBlock.
4345 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4346
4347 /// Return the position of the first non-phi node recipe in the block.
4348 iterator getFirstNonPhi();
4349
4350 /// Returns an iterator range over the PHI-like recipes in the block.
4354
4355 /// Split current block at \p SplitAt by inserting a new block between the
4356 /// current block and its successors and moving all recipes starting at
4357 /// SplitAt to the new block. Returns the new block.
4358 VPBasicBlock *splitAt(iterator SplitAt);
4359
4360 VPRegionBlock *getEnclosingLoopRegion();
4361 const VPRegionBlock *getEnclosingLoopRegion() const;
4362
4363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4364 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4365 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4366 ///
4367 /// Note that the numbering is applied to the whole VPlan, so printing
4368 /// individual blocks is consistent with the whole VPlan printing.
4369 void print(raw_ostream &O, const Twine &Indent,
4370 VPSlotTracker &SlotTracker) const override;
4371 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4372#endif
4373
4374 /// If the block has multiple successors, return the branch recipe terminating
4375 /// the block. If there are no or only a single successor, return nullptr;
4376 VPRecipeBase *getTerminator();
4377 const VPRecipeBase *getTerminator() const;
4378
4379 /// Returns true if the block is exiting it's parent region.
4380 bool isExiting() const;
4381
4382 /// Clone the current block and it's recipes, without updating the operands of
4383 /// the cloned recipes.
4384 VPBasicBlock *clone() override;
4385
4386 /// Returns the predecessor block at index \p Idx with the predecessors as per
4387 /// the corresponding plain CFG. If the block is an entry block to a region,
4388 /// the first predecessor is the single predecessor of a region, and the
4389 /// second predecessor is the exiting block of the region.
4390 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4391
4392protected:
4393 /// Execute the recipes in the IR basic block \p BB.
4394 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4395
4396 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4397 /// generated for this VPBB.
4398 void connectToPredecessors(VPTransformState &State);
4399
4400private:
4401 /// Create an IR BasicBlock to hold the output instructions generated by this
4402 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4403 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4404};
4405
4406inline const VPBasicBlock *
4408 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4409}
4410
4411/// A special type of VPBasicBlock that wraps an existing IR basic block.
4412/// Recipes of the block get added before the first non-phi instruction in the
4413/// wrapped block.
4414/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4415/// preheader block.
4416class VPIRBasicBlock : public VPBasicBlock {
4417 friend class VPlan;
4418
4419 BasicBlock *IRBB;
4420
4421 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4422 VPIRBasicBlock(BasicBlock *IRBB)
4423 : VPBasicBlock(VPIRBasicBlockSC,
4424 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4425 IRBB(IRBB) {}
4426
4427public:
4428 ~VPIRBasicBlock() override = default;
4429
4430 static inline bool classof(const VPBlockBase *V) {
4431 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4432 }
4433
4434 /// The method which generates the output IR instructions that correspond to
4435 /// this VPBasicBlock, thereby "executing" the VPlan.
4436 void execute(VPTransformState *State) override;
4437
4438 VPIRBasicBlock *clone() override;
4439
4440 BasicBlock *getIRBasicBlock() const { return IRBB; }
4441};
4442
4443/// Track information about the canonical IV value of a region.
4444/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4446 /// VPRegionValue for the canonical IV, whose allocation is managed by
4447 /// VPCanonicalIVInfo.
4448 std::unique_ptr<VPRegionValue> CanIV;
4449
4450 /// Whether the increment of the canonical IV may unsigned wrap or not.
4451 bool HasNUW = true;
4452
4453public:
4455 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4456
4457 VPRegionValue *getRegionValue() { return CanIV.get(); }
4458 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4459
4460 bool hasNUW() const { return HasNUW; }
4461
4462 void clearNUW() { HasNUW = false; }
4463};
4464
4465/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4466/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4467/// A VPRegionBlock may indicate that its contents are to be replicated several
4468/// times. This is designed to support predicated scalarization, in which a
4469/// scalar if-then code structure needs to be generated VF * UF times. Having
4470/// this replication indicator helps to keep a single model for multiple
4471/// candidate VF's. The actual replication takes place only once the desired VF
4472/// and UF have been determined.
4473class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4474 friend class VPlan;
4475
4476 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4477 VPBlockBase *Entry;
4478
4479 /// Hold the Single Exiting block of the SESE region modelled by the
4480 /// VPRegionBlock.
4481 VPBlockBase *Exiting;
4482
4483 /// Holds the Canonical IV of the loop region along with additional
4484 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4485 /// Loop regions retain their canonical IVs until they are dissolved, even if
4486 /// the canonical IV has no users.
4487 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4488
4489 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4490 /// VPRegionBlocks.
4491 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4492 const std::string &Name = "")
4493 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4494 if (Entry) {
4495 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4496 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4497 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4498 Entry->setParent(this);
4499 Exiting->setParent(this);
4500 }
4501 }
4502
4503 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4504 VPBlockBase *Exiting, const std::string &Name = "")
4505 : VPRegionBlock(Entry, Exiting, Name) {
4506 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4507 }
4508
4509public:
4510 ~VPRegionBlock() override = default;
4511
4512 /// Method to support type inquiry through isa, cast, and dyn_cast.
4513 static inline bool classof(const VPBlockBase *V) {
4514 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4515 }
4516
4517 const VPBlockBase *getEntry() const { return Entry; }
4518 VPBlockBase *getEntry() { return Entry; }
4519
4520 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4521 /// EntryBlock must have no predecessors.
4522 void setEntry(VPBlockBase *EntryBlock) {
4523 assert(!EntryBlock->hasPredecessors() &&
4524 "Entry block cannot have predecessors.");
4525 Entry = EntryBlock;
4526 EntryBlock->setParent(this);
4527 }
4528
4529 const VPBlockBase *getExiting() const { return Exiting; }
4530 VPBlockBase *getExiting() { return Exiting; }
4531
4532 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4533 /// ExitingBlock must have no successors.
4534 void setExiting(VPBlockBase *ExitingBlock) {
4535 assert(!ExitingBlock->hasSuccessors() &&
4536 "Exit block cannot have successors.");
4537 Exiting = ExitingBlock;
4538 ExitingBlock->setParent(this);
4539 }
4540
4541 /// Returns the pre-header VPBasicBlock of the loop region.
4543 assert(!isReplicator() && "should only get pre-header of loop regions");
4544 return getSinglePredecessor()->getExitingBasicBlock();
4545 }
4546
4547 /// An indicator whether this region is to generate multiple replicated
4548 /// instances of output IR corresponding to its VPBlockBases.
4549 bool isReplicator() const { return !CanIVInfo; }
4550
4551 /// The method which generates the output IR instructions that correspond to
4552 /// this VPRegionBlock, thereby "executing" the VPlan.
4553 void execute(VPTransformState *State) override;
4554
4555 // Return the cost of this region.
4556 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4557
4558#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4559 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4560 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4561 /// consequtive numbers.
4562 ///
4563 /// Note that the numbering is applied to the whole VPlan, so printing
4564 /// individual regions is consistent with the whole VPlan printing.
4565 void print(raw_ostream &O, const Twine &Indent,
4566 VPSlotTracker &SlotTracker) const override;
4567 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4568#endif
4569
4570 /// Clone all blocks in the single-entry single-exit region of the block and
4571 /// their recipes without updating the operands of the cloned recipes.
4572 VPRegionBlock *clone() override;
4573
4574 /// Remove the current region from its VPlan, connecting its predecessor to
4575 /// its entry, and its exiting block to its successor.
4576 void dissolveToCFGLoop();
4577
4578 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4579 /// a new increment before the terminator and return it. The canonical IV
4580 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4581 VPInstruction *getOrCreateCanonicalIVIncrement();
4582
4583 /// Return the canonical induction variable of the region, null for
4584 /// replicating regions.
4586 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4587 }
4589 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4590 }
4591
4592 /// Return the type of the canonical IV for loop regions.
4594 return CanIVInfo->getRegionValue()->getType();
4595 }
4596
4597 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4598 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4599
4600 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4602 assert(Increment && "Must provide increment to clear");
4603 Increment->dropPoisonGeneratingFlags();
4604 CanIVInfo->clearNUW();
4605 }
4606};
4607
4609 return getParent()->getParent();
4610}
4611
4613 return getParent()->getParent();
4614}
4615
4616/// VPlan models a candidate for vectorization, encoding various decisions take
4617/// to produce efficient output IR, including which branches, basic-blocks and
4618/// output IR instructions to generate, and their cost. VPlan holds a
4619/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4620/// VPBasicBlock.
4621class VPlan {
4622 friend class VPlanPrinter;
4623 friend class VPSlotTracker;
4624
4625 /// VPBasicBlock corresponding to the original preheader. Used to place
4626 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4627 /// rest of VPlan execution.
4628 /// When this VPlan is used for the epilogue vector loop, the entry will be
4629 /// replaced by a new entry block created during skeleton creation.
4630 VPBasicBlock *Entry;
4631
4632 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4633 VPIRBasicBlock *ScalarHeader;
4634
4635 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4636 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4637 /// e.g. if the scalar epilogue always executes.
4639
4640 /// Holds the VFs applicable to this VPlan.
4642
4643 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4644 /// any UF.
4646
4647 /// Holds the name of the VPlan, for printing.
4648 std::string Name;
4649
4650 /// Represents the trip count of the original loop, for folding
4651 /// the tail.
4652 VPValue *TripCount = nullptr;
4653
4654 /// Represents the backedge taken count of the original loop, for folding
4655 /// the tail. It equals TripCount - 1.
4656 VPSymbolicValue *BackedgeTakenCount = nullptr;
4657
4658 /// Represents the vector trip count.
4659 VPSymbolicValue VectorTripCount;
4660
4661 /// Represents the vectorization factor of the loop.
4662 VPSymbolicValue VF;
4663
4664 /// Represents the unroll factor of the loop.
4665 VPSymbolicValue UF;
4666
4667 /// Represents the loop-invariant VF * UF of the vector loop region.
4668 VPSymbolicValue VFxUF;
4669
4670 /// Contains all the external definitions created for this VPlan, as a mapping
4671 /// from IR Values to VPIRValues.
4673
4674 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4675 /// VPlan is destroyed.
4676 SmallVector<VPBlockBase *> CreatedBlocks;
4677
4678 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4679 /// wrapping the original header of the scalar loop. The vector loop will have
4680 /// index type \p IdxTy.
4681 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4682 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4683 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4684 Entry->setPlan(this);
4685 assert(ScalarHeader->getNumSuccessors() == 0 &&
4686 "scalar header must be a leaf node");
4687 }
4688
4689public:
4690 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4691 /// original preheader and scalar header of \p L, to be used as entry and
4692 /// scalar header blocks of the new VPlan. The vector loop will have index
4693 /// type \p IdxTy.
4694 VPlan(Loop *L, Type *IdxTy);
4695
4696 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4697 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4698 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4699 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4700 setEntry(createVPBasicBlock("preheader"));
4701 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4702 }
4703
4705
4707 Entry = VPBB;
4708 VPBB->setPlan(this);
4709 }
4710
4711 /// Generate the IR code for this VPlan.
4712 void execute(VPTransformState *State);
4713
4714 /// Return the cost of this plan.
4716
4717 VPBasicBlock *getEntry() { return Entry; }
4718 const VPBasicBlock *getEntry() const { return Entry; }
4719
4720 /// Returns the preheader of the vector loop region, if one exists, or null
4721 /// otherwise.
4723 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4724 return VectorRegion
4725 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4726 : nullptr;
4727 }
4728
4729 /// Returns the VPRegionBlock of the vector loop.
4732
4733 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4734 /// loop region contains a nested loop region.
4735 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4736
4737 /// Returns the 'middle' block of the plan, that is the block that selects
4738 /// whether to execute the scalar tail loop or the exit block from the loop
4739 /// latch. If there is an early exit from the vector loop, the middle block
4740 /// conceptully has the early exit block as third successor, split accross 2
4741 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4742 /// tail loop or the exit block. If the scalar tail loop or exit block are
4743 /// known to always execute, the middle block may branch directly to that
4744 /// block. This function cannot be called once the vector loop region has been
4745 /// removed.
4747 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4748 assert(
4749 LoopRegion &&
4750 "cannot call the function after vector loop region has been removed");
4751 // The middle block is always the last successor of the region.
4752 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4753 }
4754
4756 return const_cast<VPlan *>(this)->getMiddleBlock();
4757 }
4758
4759 /// Return the VPBasicBlock for the preheader of the scalar loop.
4762 getScalarHeader()->getSinglePredecessor());
4763 }
4764
4765 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4766 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4767
4768 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4769 /// the original scalar loop.
4770 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4771
4772 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4773 /// exit block.
4775
4776 /// Returns true if \p VPBB is an exit block.
4777 bool isExitBlock(VPBlockBase *VPBB);
4778
4779 /// The trip count of the original loop.
4781 assert(TripCount && "trip count needs to be set before accessing it");
4782 return TripCount;
4783 }
4784
4785 /// Set the trip count assuming it is currently null; if it is not - use
4786 /// resetTripCount().
4787 void setTripCount(VPValue *NewTripCount) {
4788 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4789 TripCount = NewTripCount;
4790 }
4791
4792 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4793 /// the original trip count have been replaced.
4794 void resetTripCount(VPValue *NewTripCount) {
4795 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4796 "TripCount must be set when resetting");
4797 TripCount = NewTripCount;
4798 }
4799
4800 /// The backedge taken count of the original loop.
4802 // BTC shares the canonical IV type with VectorTripCount.
4803 if (!BackedgeTakenCount)
4804 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4805 return BackedgeTakenCount;
4806 }
4807 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4808
4809 /// The vector trip count.
4810 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4811
4812 /// Returns the VF of the vector loop region.
4813 VPSymbolicValue &getVF() { return VF; };
4814 const VPSymbolicValue &getVF() const { return VF; };
4815
4816 /// Returns the UF of the vector loop region.
4817 VPSymbolicValue &getUF() { return UF; };
4818
4819 /// Returns VF * UF of the vector loop region.
4820 VPSymbolicValue &getVFxUF() { return VFxUF; }
4821
4824 }
4825
4826 const DataLayout &getDataLayout() const {
4828 }
4829
4830 void addVF(ElementCount VF) { VFs.insert(VF); }
4831
4833 assert(hasVF(VF) && "Cannot set VF not already in plan");
4834 VFs.clear();
4835 VFs.insert(VF);
4836 }
4837
4838 /// Remove \p VF from the plan.
4840 assert(hasVF(VF) && "tried to remove VF not present in plan");
4841 VFs.remove(VF);
4842 }
4843
4844 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4845 bool hasScalableVF() const {
4846 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4847 }
4848
4849 /// Returns an iterator range over all VFs of the plan.
4852 return VFs;
4853 }
4854
4855 /// Returns the single VF of the plan, asserting that the plan has exactly
4856 /// one VF.
4858 assert(VFs.size() == 1 && "expected plan with single VF");
4859 return VFs[0];
4860 }
4861
4862 bool hasScalarVFOnly() const {
4863 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
4864 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
4865 "Plan with scalar VF should only have a single VF");
4866 return HasScalarVFOnly;
4867 }
4868
4869 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4870
4871 /// Returns the concrete UF of the plan, after unrolling.
4872 unsigned getConcreteUF() const {
4873 assert(UFs.size() == 1 && "Expected a single UF");
4874 return UFs[0];
4875 }
4876
4877 void setUF(unsigned UF) {
4878 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4879 UFs.clear();
4880 UFs.insert(UF);
4881 }
4882
4883 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
4884 /// concrete UF.
4885 bool isUnrolled() const { return UFs.size() == 1; }
4886
4887 /// Return a string with the name of the plan and the applicable VFs and UFs.
4888 std::string getName() const;
4889
4890 void setName(const Twine &newName) { Name = newName.str(); }
4891
4892 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
4893 /// yet) for \p V.
4895 assert(V && "Trying to get or add the VPIRValue of a null Value");
4896 auto [It, Inserted] = LiveIns.try_emplace(V);
4897 if (Inserted) {
4898 if (auto *CI = dyn_cast<ConstantInt>(V))
4899 It->second = new VPConstantInt(CI);
4900 else
4901 It->second = new VPIRValue(V);
4902 }
4903
4904 assert(isa<VPIRValue>(It->second) &&
4905 "Only VPIRValues should be in mapping");
4906 return It->second;
4907 }
4909 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
4910 return getOrAddLiveIn(V->getValue());
4911 }
4912
4913 /// Return a VPIRValue wrapping i1 true.
4914 VPIRValue *getTrue() { return getConstantInt(1, 1); }
4915
4916 /// Return a VPIRValue wrapping i1 false.
4917 VPIRValue *getFalse() { return getConstantInt(1, 0); }
4918
4919 /// Return a VPIRValue wrapping the null value of type \p Ty.
4920 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
4921
4922 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
4924 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
4925 }
4926
4927 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
4928 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
4929 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
4930 }
4931
4932 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
4933 /// value.
4935 bool IsSigned = false) {
4936 return getConstantInt(APInt(BitWidth, Val, IsSigned));
4937 }
4938
4939 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
4941 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
4942 }
4943
4944 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
4945 /// otherwise.
4946 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
4947
4948 /// Return the list of live-in VPValues available in the VPlan.
4949 auto getLiveIns() const { return LiveIns.values(); }
4950
4951#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4952 /// Print the live-ins of this VPlan to \p O.
4953 void printLiveIns(raw_ostream &O) const;
4954
4955 /// Print this VPlan to \p O.
4956 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
4957
4958 /// Print this VPlan in DOT format to \p O.
4959 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
4960
4961 /// Dump the plan to stderr (for debugging).
4962 LLVM_DUMP_METHOD void dump() const;
4963#endif
4964
4965 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4966 /// recipes to refer to the clones, and return it.
4968
4969 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4970 /// present. The returned block is owned by the VPlan and deleted once the
4971 /// VPlan is destroyed.
4973 VPRecipeBase *Recipe = nullptr) {
4974 auto *VPB = new VPBasicBlock(Name, Recipe);
4975 CreatedBlocks.push_back(VPB);
4976 return VPB;
4977 }
4978
4979 /// Create a new loop region with a canonical IV using \p CanIVTy and
4980 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
4981 /// to \p Entry and \p Exiting respectively, if provided. The returned block
4982 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4984 const std::string &Name = "",
4985 VPBlockBase *Entry = nullptr,
4986 VPBlockBase *Exiting = nullptr) {
4987 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
4988 CreatedBlocks.push_back(VPB);
4989 return VPB;
4990 }
4991
4992 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
4993 /// returned block is owned by the VPlan and deleted once the VPlan is
4994 /// destroyed.
4996 const std::string &Name = "") {
4997 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
4998 CreatedBlocks.push_back(VPB);
4999 return VPB;
5000 }
5001
5002 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5003 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5004 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5006
5007 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5008 /// instructions in \p IRBB, except its terminator which is managed by the
5009 /// successors of the block in VPlan. The returned block is owned by the VPlan
5010 /// and deleted once the VPlan is destroyed.
5012
5013 /// Returns true if the VPlan is based on a loop with an early exit. That is
5014 /// the case if the VPlan has either more than one exit block or a single exit
5015 /// block with multiple predecessors (one for the exit via the latch and one
5016 /// via the other early exit).
5017 bool hasEarlyExit() const {
5018 return count_if(ExitBlocks,
5019 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5020 1 ||
5021 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5022 }
5023
5024 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5025 /// if the middle block is a predecessor of the scalar preheader. Note that
5026 /// this relies on unneeded branches to the scalar tail loop being removed.
5027 bool hasScalarTail() const {
5028 auto *ScalarPH = getScalarPreheader();
5029 return ScalarPH &&
5030 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
5031 }
5032
5033 /// The type of the canonical induction variable of the vector loop.
5034 Type *getIndexType() const { return VF.getType(); }
5035};
5036
5037#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5038inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5039 Plan.print(OS);
5040 return OS;
5041}
5042#endif
5043
5044} // end namespace llvm
5045
5046#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:584
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1080
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3920
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:3914
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4263
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4291
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4338
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4293
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4290
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4316
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4274
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4280
iterator end()
Definition VPlan.h:4300
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4298
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4292
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4351
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:785
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
~VPBasicBlock() override
Definition VPlan.h:4284
const_reverse_iterator rbegin() const
Definition VPlan.h:4304
reverse_iterator rend()
Definition VPlan.h:4305
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4278
VPRecipeBase & back()
Definition VPlan.h:4313
const VPRecipeBase & front() const
Definition VPlan.h:4310
const_iterator begin() const
Definition VPlan.h:4299
VPRecipeBase & front()
Definition VPlan.h:4311
const VPRecipeBase & back() const
Definition VPlan.h:4312
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4329
bool empty() const
Definition VPlan.h:4309
const_iterator end() const
Definition VPlan.h:4301
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4324
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4319
reverse_iterator rbegin()
Definition VPlan.h:4303
friend class VPlan
Definition VPlan.h:4264
size_t size() const
Definition VPlan.h:4308
const_reverse_iterator rend() const
Definition VPlan.h:4306
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:2920
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:2925
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2893
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:2915
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:2936
VPBlendRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:2902
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2900
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:2931
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2911
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:93
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:314
VPRegionBlock * getParent()
Definition VPlan.h:185
VPBlocksTy & getPredecessors()
Definition VPlan.h:222
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:219
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:384
void setName(const Twine &newName)
Definition VPlan.h:178
size_t getNumSuccessors() const
Definition VPlan.h:236
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:218
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:216
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:336
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:683
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:172
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:272
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:349
size_t getNumPredecessors() const
Definition VPlan.h:237
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:305
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:342
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:214
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:221
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:170
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const VPRegionBlock * getParent() const
Definition VPlan.h:186
const std::string & getName() const
Definition VPlan.h:176
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:324
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:262
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:296
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:232
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:256
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:321
friend class VPBlockUtils
Definition VPlan.h:94
unsigned getVPBlockID() const
Definition VPlan.h:183
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:363
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:328
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:162
VPBlocksTy & getSuccessors()
Definition VPlan.h:211
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:285
void setParent(VPRegionBlock *P)
Definition VPlan.h:196
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:278
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:226
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:210
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3408
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3392
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3416
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3389
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4457
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4454
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4458
bool hasNUW() const
Definition VPlan.h:4460
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3952
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:3946
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:3964
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:3958
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3971
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4084
VPValue * getIndex() const
Definition VPlan.h:4081
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:4083
VPIRValue * getStartValue() const
Definition VPlan.h:4080
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlan.h:4074
VPValue * getStepValue() const
Definition VPlan.h:4082
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4069
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4062
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:4053
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4087
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:4046
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3889
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:3894
VPExpandSCEVRecipe(const SCEV *Expr)
const SCEV * getSCEV() const
Definition VPlan.h:3900
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3885
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3543
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3525
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3507
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3495
bool isSingleScalar() const
Returns true if the result of this VPExpressionRecipe is a single-scalar.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Sub, VPReductionRecipe *Red)
Definition VPlan.h:3481
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3473
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3477
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3537
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3475
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2395
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2397
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2406
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2419
static bool classof(const VPValue *V)
Definition VPlan.h:2416
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2442
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2402
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2447
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2431
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2439
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2412
VPValue * getStartValue() const
Definition VPlan.h:2434
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2451
~VPHeaderPHIRecipe() override=default
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2145
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2162
unsigned getOpcode() const
Definition VPlan.h:2158
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2138
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4416
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:495
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4440
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4430
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4417
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:520
Class to record and manage LLVM IR flags.
Definition VPlan.h:696
FastMathFlagsTy FMFs
Definition VPlan.h:784
ReductionFlagsTy ReductionFlags
Definition VPlan.h:786
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1039
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:877
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:857
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:843
WrapFlagsTy WrapFlags
Definition VPlan.h:778
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:836
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:1001
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1065
TruncFlagsTy TruncFlags
Definition VPlan.h:779
CmpInst::Predicate getPredicate() const
Definition VPlan.h:973
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1049
uint8_t AllFlags[2]
Definition VPlan.h:787
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1009
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:882
ExactFlagsTy ExactFlags
Definition VPlan.h:781
bool hasNoSignedWrap() const
Definition VPlan.h:1028
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1053
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:848
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:853
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:862
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:831
uint8_t GEPFlagsStorage
Definition VPlan.h:782
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:867
bool isNonNeg() const
Definition VPlan.h:1011
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:991
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:996
DisjointFlagsTy DisjointFlags
Definition VPlan.h:780
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:981
bool hasNoUnsignedWrap() const
Definition VPlan.h:1017
FCmpFlagsTy FCmpFlags
Definition VPlan.h:785
NonNegFlagsTy NonNegFlags
Definition VPlan.h:783
bool isReductionInLoop() const
Definition VPlan.h:1071
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:893
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:930
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:872
uint8_t CmpPredStorage
Definition VPlan.h:777
RecurKind getRecurKind() const
Definition VPlan.h:1059
VPIRFlags(Instruction &I)
Definition VPlan.h:793
Instruction & getInstruction() const
Definition VPlan.h:1725
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1733
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1712
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1739
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1727
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1700
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1172
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1208
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1180
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1192
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1529
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1572
static bool classof(const VPUser *R)
Definition VPlan.h:1557
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1537
Type * getResultType() const
Definition VPlan.h:1578
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1561
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1227
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1461
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1481
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1400
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1325
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1316
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1329
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1341
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1319
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1267
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1312
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1262
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1259
@ VScale
Returns the value for vscale.
Definition VPlan.h:1345
@ CanonicalIVIncrementForPart
Definition VPlan.h:1243
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1270
bool hasResult() const
Definition VPlan.h:1426
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1484
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1466
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1506
unsigned getOpcode() const
Definition VPlan.h:1410
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1509
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1475
VPInstruction * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1402
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1451
A common base class for interleaved memory operations.
Definition VPlan.h:2961
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3024
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3030
static bool classof(const VPUser *U)
Definition VPlan.h:3006
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:2973
Instruction * getInsertPos() const
Definition VPlan.h:3028
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3001
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3026
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3018
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3047
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3012
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3127
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3121
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3134
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3114
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3101
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:3057
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3084
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3067
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3078
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3059
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
A VPRecipeValue defined by a multi-def recipe, stores a pointer to it.
Definition VPlanValue.h:364
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1590
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1619
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1614
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4407
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1639
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1599
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1624
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1628
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3583
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3594
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3578
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:401
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:549
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4608
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenMemIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:419
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:557
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:476
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:554
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:525
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:403
const VPBasicBlock * getParent() const
Definition VPlan.h:477
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:530
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:522
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:466
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3282
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3261
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3285
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3272
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2854
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2840
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2822
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2833
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2866
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2848
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2803
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2857
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2871
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPReductionPHIRecipe * cloneWithOperands(VPValue *Start, VPValue *BackedgeValue)
Definition VPlan.h:2815
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2863
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2851
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3150
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3159
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3224
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3193
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3208
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3235
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3237
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3220
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3173
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3222
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3179
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3226
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3233
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3228
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3187
static bool classof(const VPUser *U)
Definition VPlan.h:3198
static bool classof(const VPValue *VPV)
Definition VPlan.h:3203
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3242
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4473
const VPBlockBase * getEntry() const
Definition VPlan.h:4517
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4549
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4534
VPBlockBase * getExiting()
Definition VPlan.h:4530
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4588
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4522
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4593
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4598
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4601
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4585
const VPBlockBase * getExiting() const
Definition VPlan.h:4529
VPBlockBase * getEntry()
Definition VPlan.h:4518
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4542
friend class VPlan
Definition VPlan.h:4474
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4513
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3304
bool isSingleScalar() const
Definition VPlan.h:3352
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3312
~VPReplicateRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3364
bool isPredicated() const
Definition VPlan.h:3354
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3326
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3357
unsigned getOpcode() const
Definition VPlan.h:3376
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3371
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4179
VPValue * getStepValue() const
Definition VPlan.h:4152
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4146
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4119
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4165
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4131
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4160
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4156
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4112
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4173
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:610
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:616
static bool classof(const VPValue *V)
Definition VPlan.h:668
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:681
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:625
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:684
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, Value *UV=nullptr, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:620
static bool classof(const VPUser *U)
Definition VPlan.h:673
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:612
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1557
operand_range operands()
Definition VPlanValue.h:455
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:428
unsigned getNumOperands() const
Definition VPlanValue.h:422
operand_iterator op_end()
Definition VPlanValue.h:453
operand_iterator op_begin()
Definition VPlanValue.h:451
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:423
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:403
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:449
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:448
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208
unsigned getNumUsers() const
Definition VPlanValue.h:115
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2272
VPValue * getVFValue() const
Definition VPlan.h:2261
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2258
int64_t getStride() const
Definition VPlan.h:2259
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2293
VPValue * getOffset() const
Definition VPlan.h:2262
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2286
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2248
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2279
VPValue * getPointer() const
Definition VPlan.h:2260
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
VPValue * getStride() const
Definition VPlan.h:2327
Type * getSourceElementType() const
Definition VPlan.h:2335
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2337
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2344
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2318
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2361
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2351
VPValue * getVFxPart() const
Definition VPlan.h:2329
A recipe for widening Call instructions using library calls.
Definition VPlan.h:2067
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:2074
const_operand_range args() const
Definition VPlan.h:2115
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2093
operand_range args()
Definition VPlan.h:2114
Function * getCalledScalarFunction() const
Definition VPlan.h:2110
~VPWidenCallRecipe() override=default
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV, const VPIRFlags::WrapFlagsTy &Flags={false, false})
Definition VPlan.h:3990
~VPWidenCanonicalIVRecipe() override=default
VPValue * getStepValue() const
Definition VPlan.h:4023
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:4012
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:4019
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3997
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4007
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1848
Instruction::CastOps getOpcode() const
Definition VPlan.h:1884
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1853
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1869
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2210
Type * getSourceElementType() const
Definition VPlan.h:2215
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2218
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2202
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2187
void execute(VPTransformState &State) override=0
Generate the phi nodes.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2540
static bool classof(const VPValue *V)
Definition VPlan.h:2488
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2507
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2525
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2500
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2515
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2518
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2470
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2503
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2476
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2523
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2532
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2483
const VPValue * getVFValue() const
Definition VPlan.h:2510
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2493
const VPValue * getStepValue() const
Definition VPlan.h:2504
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2601
const TruncInst * getTruncInst() const
Definition VPlan.h:2617
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2595
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2605
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2587
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2561
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2616
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2570
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2627
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2612
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1895
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1945
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:1999
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:2005
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1931
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:2011
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1981
static bool classof(const VPValue *V)
Definition VPlan.h:1976
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1956
VPWidenIntrinsicRecipe(const unsigned char SC, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1909
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:2008
~VPWidenIntrinsicRecipe() override=default
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1966
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static bool classof(const VPUser *U)
Definition VPlan.h:1971
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
~VPWidenMemIntrinsicRecipe() override=default
VPWidenMemIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2044
VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2030
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3610
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3621
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3644
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3612
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3666
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3618
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3654
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3615
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3631
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3650
void setMask(VPValue *Mask)
Definition VPlan.h:3623
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3661
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3647
A recipe for widened phis.
Definition VPlan.h:2685
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2721
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2699
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingValues, debug location DL and Name.
Definition VPlan.h:2692
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2654
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2663
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2644
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1790
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1810
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1839
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1794
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1802
~VPWidenRecipe() override=default
VPWidenRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1812
unsigned getOpcode() const
Definition VPlan.h:1829
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4621
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4946
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1207
friend class VPSlotTracker
Definition VPlan.h:4623
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1183
bool hasVF(ElementCount VF) const
Definition VPlan.h:4844
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4857
const DataLayout & getDataLayout() const
Definition VPlan.h:4826
LLVMContext & getContext() const
Definition VPlan.h:4822
VPBasicBlock * getEntry()
Definition VPlan.h:4717
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5034
void setName(const Twine &newName)
Definition VPlan.h:4890
bool hasScalableVF() const
Definition VPlan.h:4845
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4780
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4801
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4851
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:932
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:907
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:4908
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:940
const VPBasicBlock * getEntry() const
Definition VPlan.h:4718
friend class VPlanPrinter
Definition VPlan.h:4622
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4917
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:4940
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4820
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:4923
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:4995
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1342
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4949
bool hasUF(unsigned UF) const
Definition VPlan.h:4869
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4770
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4698
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4810
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4807
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4894
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:4983
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:4920
void setVF(ElementCount VF)
Definition VPlan.h:4832
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:4885
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1098
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5017
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1080
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1113
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:4872
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:4934
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4755
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4787
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4794
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4746
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4706
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4972
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1348
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4839
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4914
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4722
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1213
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4817
bool hasScalarVFOnly() const
Definition VPlan.h:4862
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4760
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:950
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1166
void addVF(ElementCount VF)
Definition VPlan.h:4830
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4766
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1122
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4813
void setUF(unsigned UF)
Definition VPlan.h:4877
const VPSymbolicValue & getVF() const
Definition VPlan.h:4814
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5027
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1254
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4928
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4192
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1114
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2776
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:78
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:88
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI Type * getScalarTypeOrInfer(VPValue *V)
Return the scalar type of V.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2774
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:73
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2768
Possible variants of a reduction.
Definition VPlan.h:2766
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2771
unsigned VFScaleFactor
Definition VPlan.h:2772
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:264
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2737
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2749
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2728
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:728
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:733
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:723
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:716
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1758
PHINode & getIRPhi()
Definition VPlan.h:1771
VPIRPhi(PHINode &PN)
Definition VPlan.h:1759
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1761
static bool classof(const VPUser *U)
Definition VPlan.h:1766
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1782
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
static bool classof(const VPUser *U)
Definition VPlan.h:1658
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="")
Definition VPlan.h:1654
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1673
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1688
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1668
static bool classof(const VPValue *V)
Definition VPlan.h:1663
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1118
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1159
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1130
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1119
static bool classof(const VPValue *V)
Definition VPlan.h:1152
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1124
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1147
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3721
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3758
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3731
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3757
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3738
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3722
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3748
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3672
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3673
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3698
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3708
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3681
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3692
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3707
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3823
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3839
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3832
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3867
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3824
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3852
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3866
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3842
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3770
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3809
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3771
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3788
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3779
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3810
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3794
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3800
static VPMixin * castFailed()
Definition VPlan.h:4210
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4201
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4204