LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/Twine.h"
35#include "llvm/ADT/ilist.h"
36#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPRegionBlock;
65class VPlan;
66class VPReplicateRecipe;
67class VPlanSlp;
68class Value;
69class LoopVectorizationCostModel;
70class LoopVersioning;
71
72struct VPCostContext;
73
74namespace Intrinsic {
75typedef unsigned ID;
76}
77
78/// Returns a calculation for the total number of elements for a given \p VF.
79/// For fixed width vectors this value is a constant, whereas for scalable
80/// vectors it is an expression determined at runtime.
81Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
82
83/// Return a value for Step multiplied by VF.
84Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
85 int64_t Step);
86
87/// A helper function that returns the reciprocal of the block probability of
88/// predicated blocks. If we return X, we are assuming the predicated block
89/// will execute once for every X iterations of the loop header.
90///
91/// TODO: We should use actual block probability here, if available. Currently,
92/// we always assume predicated blocks have a 50% chance of executing.
93inline unsigned getReciprocalPredBlockProb() { return 2; }
94
95/// A range of powers-of-2 vectorization factors with fixed start and
96/// adjustable end. The range includes start and excludes end, e.g.,:
97/// [1, 16) = {1, 2, 4, 8}
98struct VFRange {
99 // A power of 2.
101
102 // A power of 2. If End <= Start range is empty.
104
105 bool isEmpty() const {
107 }
108
110 : Start(Start), End(End) {
112 "Both Start and End should have the same scalable flag");
114 "Expected Start to be a power of 2");
116 "Expected End to be a power of 2");
117 }
118
119 /// Iterator to iterate over vectorization factors in a VFRange.
121 : public iterator_facade_base<iterator, std::forward_iterator_tag,
122 ElementCount> {
123 ElementCount VF;
124
125 public:
126 iterator(ElementCount VF) : VF(VF) {}
127
128 bool operator==(const iterator &Other) const { return VF == Other.VF; }
129
130 ElementCount operator*() const { return VF; }
131
133 VF *= 2;
134 return *this;
135 }
136 };
137
141 return iterator(End);
142 }
143};
144
145using VPlanPtr = std::unique_ptr<VPlan>;
146
147/// In what follows, the term "input IR" refers to code that is fed into the
148/// vectorizer whereas the term "output IR" refers to code that is generated by
149/// the vectorizer.
150
151/// VPLane provides a way to access lanes in both fixed width and scalable
152/// vectors, where for the latter the lane index sometimes needs calculating
153/// as a runtime expression.
154class VPLane {
155public:
156 /// Kind describes how to interpret Lane.
157 enum class Kind : uint8_t {
158 /// For First, Lane is the index into the first N elements of a
159 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
160 First,
161 /// For ScalableLast, Lane is the offset from the start of the last
162 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
163 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
164 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
166 };
167
168private:
169 /// in [0..VF)
170 unsigned Lane;
171
172 /// Indicates how the Lane should be interpreted, as described above.
173 Kind LaneKind;
174
175public:
176 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
177 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
178
180
181 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
182 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
183 "trying to extract with invalid offset");
184 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
185 Kind LaneKind;
186 if (VF.isScalable())
187 // In this case 'LaneOffset' refers to the offset from the start of the
188 // last subvector with VF.getKnownMinValue() elements.
190 else
191 LaneKind = VPLane::Kind::First;
192 return VPLane(LaneOffset, LaneKind);
193 }
194
196 return getLaneFromEnd(VF, 1);
197 }
198
199 /// Returns a compile-time known value for the lane index and asserts if the
200 /// lane can only be calculated at runtime.
201 unsigned getKnownLane() const {
202 assert(LaneKind == Kind::First);
203 return Lane;
204 }
205
206 /// Returns an expression describing the lane index that can be used at
207 /// runtime.
208 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
209
210 /// Returns the Kind of lane offset.
211 Kind getKind() const { return LaneKind; }
212
213 /// Returns true if this is the first lane of the whole vector.
214 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
215
216 /// Maps the lane to a cache index based on \p VF.
217 unsigned mapToCacheIndex(const ElementCount &VF) const {
218 switch (LaneKind) {
220 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
221 return VF.getKnownMinValue() + Lane;
222 default:
223 assert(Lane < VF.getKnownMinValue());
224 return Lane;
225 }
226 }
227};
228
229/// VPTransformState holds information passed down when "executing" a VPlan,
230/// needed for generating the output IR.
235 Loop *CurrentParentLoop, Type *CanonicalIVTy);
236 /// Target Transform Info.
238
239 /// The chosen Vectorization Factor of the loop being vectorized.
241
242 /// Hold the index to generate specific scalar instructions. Null indicates
243 /// that all instances are to be generated, using either scalar or vector
244 /// instructions.
245 std::optional<VPLane> Lane;
246
247 struct DataState {
248 // Each value from the original loop, when vectorized, is represented by a
249 // vector value in the map.
251
254
255 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
256 /// is false, otherwise return the generated scalar. \See set.
257 Value *get(VPValue *Def, bool IsScalar = false);
258
259 /// Get the generated Value for a given VPValue and given Part and Lane.
260 Value *get(VPValue *Def, const VPLane &Lane);
261
262 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
263
265 auto I = Data.VPV2Scalars.find(Def);
266 if (I == Data.VPV2Scalars.end())
267 return false;
268 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
269 return CacheIdx < I->second.size() && I->second[CacheIdx];
270 }
271
272 /// Set the generated vector Value for a given VPValue, if \p
273 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
274 void set(VPValue *Def, Value *V, bool IsScalar = false) {
275 if (IsScalar) {
276 set(Def, V, VPLane(0));
277 return;
278 }
279 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
280 "scalar values must be stored as (0, 0)");
281 Data.VPV2Vector[Def] = V;
282 }
283
284 /// Reset an existing vector value for \p Def and a given \p Part.
285 void reset(VPValue *Def, Value *V) {
286 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
287 Data.VPV2Vector[Def] = V;
288 }
289
290 /// Set the generated scalar \p V for \p Def and the given \p Lane.
291 void set(VPValue *Def, Value *V, const VPLane &Lane) {
292 auto &Scalars = Data.VPV2Scalars[Def];
293 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
294 if (Scalars.size() <= CacheIdx)
295 Scalars.resize(CacheIdx + 1);
296 assert(!Scalars[CacheIdx] && "should overwrite existing value");
297 Scalars[CacheIdx] = V;
298 }
299
300 /// Reset an existing scalar value for \p Def and a given \p Lane.
301 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
302 auto Iter = Data.VPV2Scalars.find(Def);
303 assert(Iter != Data.VPV2Scalars.end() &&
304 "need to overwrite existing value");
305 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
306 assert(CacheIdx < Iter->second.size() &&
307 "need to overwrite existing value");
308 Iter->second[CacheIdx] = V;
309 }
310
311 /// Add additional metadata to \p To that was not present on \p Orig.
312 ///
313 /// Currently this is used to add the noalias annotations based on the
314 /// inserted memchecks. Use this for instructions that are *cloned* into the
315 /// vector loop.
316 void addNewMetadata(Instruction *To, const Instruction *Orig);
317
318 /// Add metadata from one instruction to another.
319 ///
320 /// This includes both the original MDs from \p From and additional ones (\see
321 /// addNewMetadata). Use this for *newly created* instructions in the vector
322 /// loop.
323 void addMetadata(Value *To, Instruction *From);
324
325 /// Set the debug location in the builder using the debug location \p DL.
327
328 /// Construct the vector value of a scalarized value \p V one lane at a time.
329 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
330
331 /// Hold state information used when constructing the CFG of the output IR,
332 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
333 struct CFGState {
334 /// The previous VPBasicBlock visited. Initially set to null.
336
337 /// The previous IR BasicBlock created or used. Initially set to the new
338 /// header BasicBlock.
339 BasicBlock *PrevBB = nullptr;
340
341 /// The last IR BasicBlock in the output IR. Set to the exit block of the
342 /// vector loop.
343 BasicBlock *ExitBB = nullptr;
344
345 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
346 /// of replication, maps the BasicBlock of the last replica created.
348
349 /// Updater for the DominatorTree.
351
353 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
354
355 /// Returns the BasicBlock* mapped to the pre-header of the loop region
356 /// containing \p R.
359
360 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
362
363 /// Hold a reference to the IRBuilder used to generate output IR code.
365
366 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
368
369 /// Pointer to the VPlan code is generated for.
371
372 /// The parent loop object for the current scope, or nullptr.
374
375 /// LoopVersioning. It's only set up (non-null) if memchecks were
376 /// used.
377 ///
378 /// This is currently only used to add no-alias metadata based on the
379 /// memchecks. The actually versioning is performed manually.
381
382 /// Map SCEVs to their expanded values. Populated when executing
383 /// VPExpandSCEVRecipes.
385
386 /// VPlan-based type analysis.
388};
389
390/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
391/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
393 friend class VPBlockUtils;
394
395 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
396
397 /// An optional name for the block.
398 std::string Name;
399
400 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
401 /// it is a topmost VPBlockBase.
402 VPRegionBlock *Parent = nullptr;
403
404 /// List of predecessor blocks.
406
407 /// List of successor blocks.
409
410 /// VPlan containing the block. Can only be set on the entry block of the
411 /// plan.
412 VPlan *Plan = nullptr;
413
414 /// Add \p Successor as the last successor to this block.
415 void appendSuccessor(VPBlockBase *Successor) {
416 assert(Successor && "Cannot add nullptr successor!");
417 Successors.push_back(Successor);
418 }
419
420 /// Add \p Predecessor as the last predecessor to this block.
421 void appendPredecessor(VPBlockBase *Predecessor) {
422 assert(Predecessor && "Cannot add nullptr predecessor!");
423 Predecessors.push_back(Predecessor);
424 }
425
426 /// Remove \p Predecessor from the predecessors of this block.
427 void removePredecessor(VPBlockBase *Predecessor) {
428 auto Pos = find(Predecessors, Predecessor);
429 assert(Pos && "Predecessor does not exist");
430 Predecessors.erase(Pos);
431 }
432
433 /// Remove \p Successor from the successors of this block.
434 void removeSuccessor(VPBlockBase *Successor) {
435 auto Pos = find(Successors, Successor);
436 assert(Pos && "Successor does not exist");
437 Successors.erase(Pos);
438 }
439
440 /// This function replaces one predecessor with another, useful when
441 /// trying to replace an old block in the CFG with a new one.
442 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
443 auto I = find(Predecessors, Old);
444 assert(I != Predecessors.end());
445 assert(Old->getParent() == New->getParent() &&
446 "replaced predecessor must have the same parent");
447 *I = New;
448 }
449
450 /// This function replaces one successor with another, useful when
451 /// trying to replace an old block in the CFG with a new one.
452 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
453 auto I = find(Successors, Old);
454 assert(I != Successors.end());
455 assert(Old->getParent() == New->getParent() &&
456 "replaced successor must have the same parent");
457 *I = New;
458 }
459
460protected:
461 VPBlockBase(const unsigned char SC, const std::string &N)
462 : SubclassID(SC), Name(N) {}
463
464public:
465 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
466 /// that are actually instantiated. Values of this enumeration are kept in the
467 /// SubclassID field of the VPBlockBase objects. They are used for concrete
468 /// type identification.
469 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
470
472
473 virtual ~VPBlockBase() = default;
474
475 const std::string &getName() const { return Name; }
476
477 void setName(const Twine &newName) { Name = newName.str(); }
478
479 /// \return an ID for the concrete type of this object.
480 /// This is used to implement the classof checks. This should not be used
481 /// for any other purpose, as the values may change as LLVM evolves.
482 unsigned getVPBlockID() const { return SubclassID; }
483
484 VPRegionBlock *getParent() { return Parent; }
485 const VPRegionBlock *getParent() const { return Parent; }
486
487 /// \return A pointer to the plan containing the current block.
488 VPlan *getPlan();
489 const VPlan *getPlan() const;
490
491 /// Sets the pointer of the plan containing the block. The block must be the
492 /// entry block into the VPlan.
493 void setPlan(VPlan *ParentPlan);
494
495 void setParent(VPRegionBlock *P) { Parent = P; }
496
497 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
498 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
499 /// VPBlockBase is a VPBasicBlock, it is returned.
500 const VPBasicBlock *getEntryBasicBlock() const;
502
503 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
504 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
505 /// VPBlockBase is a VPBasicBlock, it is returned.
506 const VPBasicBlock *getExitingBasicBlock() const;
508
509 const VPBlocksTy &getSuccessors() const { return Successors; }
510 VPBlocksTy &getSuccessors() { return Successors; }
511
514
515 const VPBlocksTy &getPredecessors() const { return Predecessors; }
516 VPBlocksTy &getPredecessors() { return Predecessors; }
517
518 /// \return the successor of this VPBlockBase if it has a single successor.
519 /// Otherwise return a null pointer.
521 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
522 }
523
524 /// \return the predecessor of this VPBlockBase if it has a single
525 /// predecessor. Otherwise return a null pointer.
527 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
528 }
529
530 size_t getNumSuccessors() const { return Successors.size(); }
531 size_t getNumPredecessors() const { return Predecessors.size(); }
532
533 /// An Enclosing Block of a block B is any block containing B, including B
534 /// itself. \return the closest enclosing block starting from "this", which
535 /// has successors. \return the root enclosing block if all enclosing blocks
536 /// have no successors.
538
539 /// \return the closest enclosing block starting from "this", which has
540 /// predecessors. \return the root enclosing block if all enclosing blocks
541 /// have no predecessors.
543
544 /// \return the successors either attached directly to this VPBlockBase or, if
545 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
546 /// successors of its own, search recursively for the first enclosing
547 /// VPRegionBlock that has successors and return them. If no such
548 /// VPRegionBlock exists, return the (empty) successors of the topmost
549 /// VPBlockBase reached.
552 }
553
554 /// \return the hierarchical successor of this VPBlockBase if it has a single
555 /// hierarchical successor. Otherwise return a null pointer.
558 }
559
560 /// \return the predecessors either attached directly to this VPBlockBase or,
561 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
562 /// predecessors of its own, search recursively for the first enclosing
563 /// VPRegionBlock that has predecessors and return them. If no such
564 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
565 /// VPBlockBase reached.
568 }
569
570 /// \return the hierarchical predecessor of this VPBlockBase if it has a
571 /// single hierarchical predecessor. Otherwise return a null pointer.
574 }
575
576 /// Set a given VPBlockBase \p Successor as the single successor of this
577 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
578 /// This VPBlockBase must have no successors.
580 assert(Successors.empty() && "Setting one successor when others exist.");
581 assert(Successor->getParent() == getParent() &&
582 "connected blocks must have the same parent");
583 appendSuccessor(Successor);
584 }
585
586 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
587 /// successors of this VPBlockBase. This VPBlockBase is not added as
588 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
589 /// successors.
590 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
591 assert(Successors.empty() && "Setting two successors when others exist.");
592 appendSuccessor(IfTrue);
593 appendSuccessor(IfFalse);
594 }
595
596 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
597 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
598 /// as successor of any VPBasicBlock in \p NewPreds.
600 assert(Predecessors.empty() && "Block predecessors already set.");
601 for (auto *Pred : NewPreds)
602 appendPredecessor(Pred);
603 }
604
605 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
606 /// This VPBlockBase must have no successors. This VPBlockBase is not added
607 /// as predecessor of any VPBasicBlock in \p NewSuccs.
609 assert(Successors.empty() && "Block successors already set.");
610 for (auto *Succ : NewSuccs)
611 appendSuccessor(Succ);
612 }
613
614 /// Remove all the predecessor of this block.
615 void clearPredecessors() { Predecessors.clear(); }
616
617 /// Remove all the successors of this block.
618 void clearSuccessors() { Successors.clear(); }
619
620 /// Swap successors of the block. The block must have exactly 2 successors.
621 // TODO: This should be part of introducing conditional branch recipes rather
622 // than being independent.
624 assert(Successors.size() == 2 && "must have 2 successors to swap");
625 std::swap(Successors[0], Successors[1]);
626 }
627
628 /// The method which generates the output IR that correspond to this
629 /// VPBlockBase, thereby "executing" the VPlan.
630 virtual void execute(VPTransformState *State) = 0;
631
632 /// Return the cost of the block.
634
635 /// Return true if it is legal to hoist instructions into this block.
637 // There are currently no constraints that prevent an instruction to be
638 // hoisted into a VPBlockBase.
639 return true;
640 }
641
642#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
643 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
644 OS << getName();
645 }
646
647 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
648 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
649 /// consequtive numbers.
650 ///
651 /// Note that the numbering is applied to the whole VPlan, so printing
652 /// individual blocks is consistent with the whole VPlan printing.
653 virtual void print(raw_ostream &O, const Twine &Indent,
654 VPSlotTracker &SlotTracker) const = 0;
655
656 /// Print plain-text dump of this VPlan to \p O.
657 void print(raw_ostream &O) const {
659 print(O, "", SlotTracker);
660 }
661
662 /// Print the successors of this block to \p O, prefixing all lines with \p
663 /// Indent.
664 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
665
666 /// Dump this VPBlockBase to dbgs().
667 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
668#endif
669
670 /// Clone the current block and it's recipes without updating the operands of
671 /// the cloned recipes, including all blocks in the single-entry single-exit
672 /// region for VPRegionBlocks.
673 virtual VPBlockBase *clone() = 0;
674};
675
676/// Struct to hold various analysis needed for cost computations.
685
689 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
690 CM(CM), CostKind(CostKind) {}
691
692 /// Return the cost for \p UI with \p VF using the legacy cost model as
693 /// fallback until computing the cost of all recipes migrates to VPlan.
695
696 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
697 /// has already been pre-computed.
698 bool skipCostComputation(Instruction *UI, bool IsVector) const;
699
700 /// Returns the OperandInfo for \p V, if it is a live-in.
702};
703
704/// VPRecipeBase is a base class modeling a sequence of one or more output IR
705/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
706/// and is responsible for deleting its defined values. Single-value
707/// recipes must inherit from VPSingleDef instead of inheriting from both
708/// VPRecipeBase and VPValue separately.
709class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
710 public VPDef,
711 public VPUser {
712 friend VPBasicBlock;
713 friend class VPBlockUtils;
714
715 /// Each VPRecipe belongs to a single VPBasicBlock.
716 VPBasicBlock *Parent = nullptr;
717
718 /// The debug location for the recipe.
719 DebugLoc DL;
720
721public:
723 DebugLoc DL = {})
724 : VPDef(SC), VPUser(Operands), DL(DL) {}
725
726 template <typename IterT>
728 DebugLoc DL = {})
729 : VPDef(SC), VPUser(Operands), DL(DL) {}
730 virtual ~VPRecipeBase() = default;
731
732 /// Clone the current recipe.
733 virtual VPRecipeBase *clone() = 0;
734
735 /// \return the VPBasicBlock which this VPRecipe belongs to.
736 VPBasicBlock *getParent() { return Parent; }
737 const VPBasicBlock *getParent() const { return Parent; }
738
739 /// The method which generates the output IR instructions that correspond to
740 /// this VPRecipe, thereby "executing" the VPlan.
741 virtual void execute(VPTransformState &State) = 0;
742
743 /// Return the cost of this recipe, taking into account if the cost
744 /// computation should be skipped and the ForceTargetInstructionCost flag.
745 /// Also takes care of printing the cost for debugging.
747
748 /// Insert an unlinked recipe into a basic block immediately before
749 /// the specified recipe.
750 void insertBefore(VPRecipeBase *InsertPos);
751 /// Insert an unlinked recipe into \p BB immediately before the insertion
752 /// point \p IP;
754
755 /// Insert an unlinked Recipe into a basic block immediately after
756 /// the specified Recipe.
757 void insertAfter(VPRecipeBase *InsertPos);
758
759 /// Unlink this recipe from its current VPBasicBlock and insert it into
760 /// the VPBasicBlock that MovePos lives in, right after MovePos.
761 void moveAfter(VPRecipeBase *MovePos);
762
763 /// Unlink this recipe and insert into BB before I.
764 ///
765 /// \pre I is a valid iterator into BB.
767
768 /// This method unlinks 'this' from the containing basic block, but does not
769 /// delete it.
770 void removeFromParent();
771
772 /// This method unlinks 'this' from the containing basic block and deletes it.
773 ///
774 /// \returns an iterator pointing to the element after the erased one
776
777 /// Method to support type inquiry through isa, cast, and dyn_cast.
778 static inline bool classof(const VPDef *D) {
779 // All VPDefs are also VPRecipeBases.
780 return true;
781 }
782
783 static inline bool classof(const VPUser *U) { return true; }
784
785 /// Returns true if the recipe may have side-effects.
786 bool mayHaveSideEffects() const;
787
788 /// Returns true for PHI-like recipes.
789 bool isPhi() const {
790 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
791 }
792
793 /// Returns true if the recipe may read from memory.
794 bool mayReadFromMemory() const;
795
796 /// Returns true if the recipe may write to memory.
797 bool mayWriteToMemory() const;
798
799 /// Returns true if the recipe may read from or write to memory.
800 bool mayReadOrWriteMemory() const {
802 }
803
804 /// Returns the debug location of the recipe.
805 DebugLoc getDebugLoc() const { return DL; }
806
807protected:
808 /// Compute the cost of this recipe either using a recipe's specialized
809 /// implementation or using the legacy cost model and the underlying
810 /// instructions.
812 VPCostContext &Ctx) const;
813};
814
815// Helper macro to define common classof implementations for recipes.
816#define VP_CLASSOF_IMPL(VPDefID) \
817 static inline bool classof(const VPDef *D) { \
818 return D->getVPDefID() == VPDefID; \
819 } \
820 static inline bool classof(const VPValue *V) { \
821 auto *R = V->getDefiningRecipe(); \
822 return R && R->getVPDefID() == VPDefID; \
823 } \
824 static inline bool classof(const VPUser *U) { \
825 auto *R = dyn_cast<VPRecipeBase>(U); \
826 return R && R->getVPDefID() == VPDefID; \
827 } \
828 static inline bool classof(const VPRecipeBase *R) { \
829 return R->getVPDefID() == VPDefID; \
830 } \
831 static inline bool classof(const VPSingleDefRecipe *R) { \
832 return R->getVPDefID() == VPDefID; \
833 }
834
835/// VPSingleDef is a base class for recipes for modeling a sequence of one or
836/// more output IR that define a single result VPValue.
837/// Note that VPRecipeBase must be inherited from before VPValue.
838class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
839public:
840 template <typename IterT>
841 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
842 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
843
844 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
845 DebugLoc DL = {})
846 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
847
848 template <typename IterT>
849 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
850 DebugLoc DL = {})
851 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
852
853 static inline bool classof(const VPRecipeBase *R) {
854 switch (R->getVPDefID()) {
855 case VPRecipeBase::VPDerivedIVSC:
856 case VPRecipeBase::VPEVLBasedIVPHISC:
857 case VPRecipeBase::VPExpandSCEVSC:
858 case VPRecipeBase::VPInstructionSC:
859 case VPRecipeBase::VPReductionEVLSC:
860 case VPRecipeBase::VPReductionSC:
861 case VPRecipeBase::VPReplicateSC:
862 case VPRecipeBase::VPScalarIVStepsSC:
863 case VPRecipeBase::VPVectorPointerSC:
864 case VPRecipeBase::VPReverseVectorPointerSC:
865 case VPRecipeBase::VPWidenCallSC:
866 case VPRecipeBase::VPWidenCanonicalIVSC:
867 case VPRecipeBase::VPWidenCastSC:
868 case VPRecipeBase::VPWidenGEPSC:
869 case VPRecipeBase::VPWidenIntrinsicSC:
870 case VPRecipeBase::VPWidenSC:
871 case VPRecipeBase::VPWidenEVLSC:
872 case VPRecipeBase::VPWidenSelectSC:
873 case VPRecipeBase::VPBlendSC:
874 case VPRecipeBase::VPPredInstPHISC:
875 case VPRecipeBase::VPCanonicalIVPHISC:
876 case VPRecipeBase::VPActiveLaneMaskPHISC:
877 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
878 case VPRecipeBase::VPWidenPHISC:
879 case VPRecipeBase::VPWidenIntOrFpInductionSC:
880 case VPRecipeBase::VPWidenPointerInductionSC:
881 case VPRecipeBase::VPReductionPHISC:
882 case VPRecipeBase::VPScalarCastSC:
883 case VPRecipeBase::VPPartialReductionSC:
884 return true;
885 case VPRecipeBase::VPBranchOnMaskSC:
886 case VPRecipeBase::VPInterleaveSC:
887 case VPRecipeBase::VPIRInstructionSC:
888 case VPRecipeBase::VPWidenLoadEVLSC:
889 case VPRecipeBase::VPWidenLoadSC:
890 case VPRecipeBase::VPWidenStoreEVLSC:
891 case VPRecipeBase::VPWidenStoreSC:
892 case VPRecipeBase::VPHistogramSC:
893 // TODO: Widened stores don't define a value, but widened loads do. Split
894 // the recipes to be able to make widened loads VPSingleDefRecipes.
895 return false;
896 }
897 llvm_unreachable("Unhandled VPDefID");
898 }
899
900 static inline bool classof(const VPUser *U) {
901 auto *R = dyn_cast<VPRecipeBase>(U);
902 return R && classof(R);
903 }
904
905 virtual VPSingleDefRecipe *clone() override = 0;
906
907 /// Returns the underlying instruction.
909 return cast<Instruction>(getUnderlyingValue());
910 }
912 return cast<Instruction>(getUnderlyingValue());
913 }
914
915#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
916 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
917 LLVM_DUMP_METHOD void dump() const;
918#endif
919};
920
921/// Class to record LLVM IR flag for a recipe along with it.
923 enum class OperationType : unsigned char {
924 Cmp,
925 OverflowingBinOp,
926 DisjointOp,
927 PossiblyExactOp,
928 GEPOp,
929 FPMathOp,
930 NonNegOp,
931 Other
932 };
933
934public:
935 struct WrapFlagsTy {
936 char HasNUW : 1;
937 char HasNSW : 1;
938
940 };
941
943 char IsDisjoint : 1;
945 };
946
947private:
948 struct ExactFlagsTy {
949 char IsExact : 1;
950 };
951 struct NonNegFlagsTy {
952 char NonNeg : 1;
953 };
954 struct FastMathFlagsTy {
955 char AllowReassoc : 1;
956 char NoNaNs : 1;
957 char NoInfs : 1;
958 char NoSignedZeros : 1;
959 char AllowReciprocal : 1;
960 char AllowContract : 1;
961 char ApproxFunc : 1;
962
963 FastMathFlagsTy(const FastMathFlags &FMF);
964 };
965
966 OperationType OpType;
967
968 union {
972 ExactFlagsTy ExactFlags;
974 NonNegFlagsTy NonNegFlags;
975 FastMathFlagsTy FMFs;
976 unsigned AllFlags;
977 };
978
979protected:
981 OpType = Other.OpType;
982 AllFlags = Other.AllFlags;
983 }
984
985public:
986 template <typename IterT>
987 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
988 : VPSingleDefRecipe(SC, Operands, DL) {
989 OpType = OperationType::Other;
990 AllFlags = 0;
991 }
992
993 template <typename IterT>
994 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
996 if (auto *Op = dyn_cast<CmpInst>(&I)) {
997 OpType = OperationType::Cmp;
998 CmpPredicate = Op->getPredicate();
999 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1000 OpType = OperationType::DisjointOp;
1001 DisjointFlags.IsDisjoint = Op->isDisjoint();
1002 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1003 OpType = OperationType::OverflowingBinOp;
1004 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1005 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1006 OpType = OperationType::PossiblyExactOp;
1007 ExactFlags.IsExact = Op->isExact();
1008 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1009 OpType = OperationType::GEPOp;
1010 GEPFlags = GEP->getNoWrapFlags();
1011 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1012 OpType = OperationType::NonNegOp;
1013 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1014 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1015 OpType = OperationType::FPMathOp;
1016 FMFs = Op->getFastMathFlags();
1017 } else {
1018 OpType = OperationType::Other;
1019 AllFlags = 0;
1020 }
1021 }
1022
1023 template <typename IterT>
1024 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1025 CmpInst::Predicate Pred, DebugLoc DL = {})
1026 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1027 CmpPredicate(Pred) {}
1028
1029 template <typename IterT>
1030 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1032 : VPSingleDefRecipe(SC, Operands, DL),
1033 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1034
1035 template <typename IterT>
1036 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1037 FastMathFlags FMFs, DebugLoc DL = {})
1038 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1039 FMFs(FMFs) {}
1040
1041 template <typename IterT>
1042 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1044 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1046
1047protected:
1048 template <typename IterT>
1049 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1051 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1052 GEPFlags(GEPFlags) {}
1053
1054public:
1055 static inline bool classof(const VPRecipeBase *R) {
1056 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1057 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1058 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1059 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1060 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1061 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1062 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1063 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1064 }
1065
1066 static inline bool classof(const VPUser *U) {
1067 auto *R = dyn_cast<VPRecipeBase>(U);
1068 return R && classof(R);
1069 }
1070
1071 /// Drop all poison-generating flags.
1073 // NOTE: This needs to be kept in-sync with
1074 // Instruction::dropPoisonGeneratingFlags.
1075 switch (OpType) {
1076 case OperationType::OverflowingBinOp:
1077 WrapFlags.HasNUW = false;
1078 WrapFlags.HasNSW = false;
1079 break;
1080 case OperationType::DisjointOp:
1081 DisjointFlags.IsDisjoint = false;
1082 break;
1083 case OperationType::PossiblyExactOp:
1084 ExactFlags.IsExact = false;
1085 break;
1086 case OperationType::GEPOp:
1088 break;
1089 case OperationType::FPMathOp:
1090 FMFs.NoNaNs = false;
1091 FMFs.NoInfs = false;
1092 break;
1093 case OperationType::NonNegOp:
1094 NonNegFlags.NonNeg = false;
1095 break;
1096 case OperationType::Cmp:
1097 case OperationType::Other:
1098 break;
1099 }
1100 }
1101
1102 /// Set the IR flags for \p I.
1103 void setFlags(Instruction *I) const {
1104 switch (OpType) {
1105 case OperationType::OverflowingBinOp:
1106 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1107 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1108 break;
1109 case OperationType::DisjointOp:
1110 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1111 break;
1112 case OperationType::PossiblyExactOp:
1113 I->setIsExact(ExactFlags.IsExact);
1114 break;
1115 case OperationType::GEPOp:
1116 cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
1117 break;
1118 case OperationType::FPMathOp:
1119 I->setHasAllowReassoc(FMFs.AllowReassoc);
1120 I->setHasNoNaNs(FMFs.NoNaNs);
1121 I->setHasNoInfs(FMFs.NoInfs);
1122 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1123 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1124 I->setHasAllowContract(FMFs.AllowContract);
1125 I->setHasApproxFunc(FMFs.ApproxFunc);
1126 break;
1127 case OperationType::NonNegOp:
1128 I->setNonNeg(NonNegFlags.NonNeg);
1129 break;
1130 case OperationType::Cmp:
1131 case OperationType::Other:
1132 break;
1133 }
1134 }
1135
1137 assert(OpType == OperationType::Cmp &&
1138 "recipe doesn't have a compare predicate");
1139 return CmpPredicate;
1140 }
1141
1143
1144 /// Returns true if the recipe has fast-math flags.
1145 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1146
1148
1149 bool hasNoUnsignedWrap() const {
1150 assert(OpType == OperationType::OverflowingBinOp &&
1151 "recipe doesn't have a NUW flag");
1152 return WrapFlags.HasNUW;
1153 }
1154
1155 bool hasNoSignedWrap() const {
1156 assert(OpType == OperationType::OverflowingBinOp &&
1157 "recipe doesn't have a NSW flag");
1158 return WrapFlags.HasNSW;
1159 }
1160
1161 bool isDisjoint() const {
1162 assert(OpType == OperationType::DisjointOp &&
1163 "recipe cannot have a disjoing flag");
1165 }
1166
1167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1168 void printFlags(raw_ostream &O) const;
1169#endif
1170};
1171
1172/// Helper to access the operand that contains the unroll part for this recipe
1173/// after unrolling.
1174template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1175protected:
1176 /// Return the VPValue operand containing the unroll part or null if there is
1177 /// no such operand.
1179
1180 /// Return the unroll part.
1181 unsigned getUnrollPart(VPUser &U) const;
1182};
1183
1184/// This is a concrete Recipe that models a single VPlan-level instruction.
1185/// While as any Recipe it may generate a sequence of IR instructions when
1186/// executed, these instructions would always form a single-def expression as
1187/// the VPInstruction is also a single def-use vertex.
1189 public VPUnrollPartAccessor<1> {
1190 friend class VPlanSlp;
1191
1192public:
1193 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1194 enum {
1196 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1197 // values of a first-order recurrence.
1203 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1204 /// The first operand is the incoming value from the predecessor in VPlan,
1205 /// the second operand is the incoming value for all other predecessors
1206 /// (which are currently not modeled in VPlan).
1209 // Increment the canonical IV separately for each unrolled part.
1214 // Takes the VPValue to extract from as first operand and the lane or part
1215 // to extract as second operand, counting from the end starting with 1 for
1216 // last. The second operand must be a positive constant and <= VF.
1218 LogicalAnd, // Non-poison propagating logical And.
1219 // Add an offset in bytes (second operand) to a base pointer (first
1220 // operand). Only generates scalar values (either for the first lane only or
1221 // for all lanes, depending on its uses).
1223 // Returns a scalar boolean value, which is true if any lane of its (only
1224 // boolean) vector operand is true.
1226 };
1227
1228private:
1229 typedef unsigned char OpcodeTy;
1230 OpcodeTy Opcode;
1231
1232 /// An optional name that can be used for the generated IR instruction.
1233 const std::string Name;
1234
1235 /// Returns true if this VPInstruction generates scalar values for all lanes.
1236 /// Most VPInstructions generate a single value per part, either vector or
1237 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1238 /// values per all lanes, stemming from an original ingredient. This method
1239 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1240 /// underlying ingredient.
1241 bool doesGeneratePerAllLanes() const;
1242
1243 /// Returns true if we can generate a scalar for the first lane only if
1244 /// needed.
1245 bool canGenerateScalarForFirstLane() const;
1246
1247 /// Utility methods serving execute(): generates a single vector instance of
1248 /// the modeled instruction. \returns the generated value. . In some cases an
1249 /// existing value is returned rather than a generated one.
1250 Value *generate(VPTransformState &State);
1251
1252 /// Utility methods serving execute(): generates a scalar single instance of
1253 /// the modeled instruction for a given lane. \returns the scalar generated
1254 /// value for lane \p Lane.
1255 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1256
1257#if !defined(NDEBUG)
1258 /// Return true if the VPInstruction is a floating point math operation, i.e.
1259 /// has fast-math flags.
1260 bool isFPMathOp() const;
1261#endif
1262
1263public:
1265 const Twine &Name = "")
1266 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1267 Opcode(Opcode), Name(Name.str()) {}
1268
1269 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1270 DebugLoc DL = {}, const Twine &Name = "")
1272
1273 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1274 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1275
1276 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1277 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1278 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1279 Opcode(Opcode), Name(Name.str()) {}
1280
1281 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1282 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1283 const Twine &Name = "")
1284 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1285 Opcode(Opcode), Name(Name.str()) {
1286 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1287 }
1288
1290 DebugLoc DL = {}, const Twine &Name = "")
1291 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1292 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1293 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1294
1295 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1296 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1297
1298 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1299
1300 VPInstruction *clone() override {
1302 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1303 New->transferFlags(*this);
1304 return New;
1305 }
1306
1307 unsigned getOpcode() const { return Opcode; }
1308
1309 /// Generate the instruction.
1310 /// TODO: We currently execute only per-part unless a specific instance is
1311 /// provided.
1312 void execute(VPTransformState &State) override;
1313
1314 /// Return the cost of this VPInstruction.
1316 VPCostContext &Ctx) const override {
1317 // TODO: Compute accurate cost after retiring the legacy cost model.
1318 return 0;
1319 }
1320
1321#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1322 /// Print the VPInstruction to \p O.
1323 void print(raw_ostream &O, const Twine &Indent,
1324 VPSlotTracker &SlotTracker) const override;
1325
1326 /// Print the VPInstruction to dbgs() (for debugging).
1327 LLVM_DUMP_METHOD void dump() const;
1328#endif
1329
1330 bool hasResult() const {
1331 // CallInst may or may not have a result, depending on the called function.
1332 // Conservatively return calls have results for now.
1333 switch (getOpcode()) {
1334 case Instruction::Ret:
1335 case Instruction::Br:
1336 case Instruction::Store:
1337 case Instruction::Switch:
1338 case Instruction::IndirectBr:
1339 case Instruction::Resume:
1340 case Instruction::CatchRet:
1341 case Instruction::Unreachable:
1342 case Instruction::Fence:
1343 case Instruction::AtomicRMW:
1346 return false;
1347 default:
1348 return true;
1349 }
1350 }
1351
1352 /// Returns true if the underlying opcode may read from or write to memory.
1353 bool opcodeMayReadOrWriteFromMemory() const;
1354
1355 /// Returns true if the recipe only uses the first lane of operand \p Op.
1356 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1357
1358 /// Returns true if the recipe only uses the first part of operand \p Op.
1359 bool onlyFirstPartUsed(const VPValue *Op) const override;
1360
1361 /// Returns true if this VPInstruction produces a scalar value from a vector,
1362 /// e.g. by performing a reduction or extracting a lane.
1363 bool isVectorToScalar() const;
1364
1365 /// Returns true if this VPInstruction's operands are single scalars and the
1366 /// result is also a single scalar.
1367 bool isSingleScalar() const;
1368
1369 /// Returns the symbolic name assigned to the VPInstruction.
1370 StringRef getName() const { return Name; }
1371};
1372
1373/// A recipe to wrap on original IR instruction not to be modified during
1374/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1375/// and it is used to add a new incoming value for the single predecessor VPBB.
1376/// Expect PHIs, VPIRInstructions cannot have any operands.
1378 Instruction &I;
1379
1380public:
1382 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1383
1384 ~VPIRInstruction() override = default;
1385
1386 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1387
1389 auto *R = new VPIRInstruction(I);
1390 for (auto *Op : operands())
1391 R->addOperand(Op);
1392 return R;
1393 }
1394
1395 void execute(VPTransformState &State) override;
1396
1397 /// Return the cost of this VPIRInstruction.
1399 VPCostContext &Ctx) const override;
1400
1401 Instruction &getInstruction() const { return I; }
1402
1403#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1404 /// Print the recipe.
1405 void print(raw_ostream &O, const Twine &Indent,
1406 VPSlotTracker &SlotTracker) const override;
1407#endif
1408
1409 bool usesScalars(const VPValue *Op) const override {
1411 "Op must be an operand of the recipe");
1412 return true;
1413 }
1414
1415 bool onlyFirstPartUsed(const VPValue *Op) const override {
1417 "Op must be an operand of the recipe");
1418 return true;
1419 }
1420
1421 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1423 "Op must be an operand of the recipe");
1424 return true;
1425 }
1426
1427 /// Update the recipes single operand to the last lane of the operand using \p
1428 /// Builder. Must only be used for single operand VPIRInstructions wrapping a
1429 /// PHINode.
1430 void extractLastLaneOfOperand(VPBuilder &Builder);
1431};
1432
1433/// VPWidenRecipe is a recipe for producing a widened instruction using the
1434/// opcode and operands of the recipe. This recipe covers most of the
1435/// traditional vectorization cases where each recipe transforms into a
1436/// vectorized version of itself.
1438 unsigned Opcode;
1439
1440protected:
1441 template <typename IterT>
1442 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1444 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1445
1446public:
1447 template <typename IterT>
1449 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1450
1451 ~VPWidenRecipe() override = default;
1452
1453 VPWidenRecipe *clone() override {
1454 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1455 R->transferFlags(*this);
1456 return R;
1457 }
1458
1459 static inline bool classof(const VPRecipeBase *R) {
1460 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1461 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1462 }
1463
1464 static inline bool classof(const VPUser *U) {
1465 auto *R = dyn_cast<VPRecipeBase>(U);
1466 return R && classof(R);
1467 }
1468
1469 /// Produce a widened instruction using the opcode and operands of the recipe,
1470 /// processing State.VF elements.
1471 void execute(VPTransformState &State) override;
1472
1473 /// Return the cost of this VPWidenRecipe.
1475 VPCostContext &Ctx) const override;
1476
1477 unsigned getOpcode() const { return Opcode; }
1478
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1480 /// Print the recipe.
1481 void print(raw_ostream &O, const Twine &Indent,
1482 VPSlotTracker &SlotTracker) const override;
1483#endif
1484};
1485
1486/// A recipe for widening operations with vector-predication intrinsics with
1487/// explicit vector length (EVL).
1490
1491public:
1492 template <typename IterT>
1494 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1495 addOperand(&EVL);
1496 }
1498 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1499 transferFlags(W);
1500 }
1501
1502 ~VPWidenEVLRecipe() override = default;
1503
1504 VPWidenRecipe *clone() override final {
1505 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1506 return nullptr;
1507 }
1508
1509 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1510
1512 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1513
1514 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1515 /// processing EVL elements.
1516 void execute(VPTransformState &State) override final;
1517
1518 /// Returns true if the recipe only uses the first lane of operand \p Op.
1519 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1521 "Op must be an operand of the recipe");
1522 // EVL in that recipe is always the last operand, thus any use before means
1523 // the VPValue should be vectorized.
1524 return getEVL() == Op;
1525 }
1526
1527#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1528 /// Print the recipe.
1529 void print(raw_ostream &O, const Twine &Indent,
1530 VPSlotTracker &SlotTracker) const override final;
1531#endif
1532};
1533
1534/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1536 /// Cast instruction opcode.
1537 Instruction::CastOps Opcode;
1538
1539 /// Result type for the cast.
1540 Type *ResultTy;
1541
1542public:
1544 CastInst &UI)
1545 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1546 ResultTy(ResultTy) {
1547 assert(UI.getOpcode() == Opcode &&
1548 "opcode of underlying cast doesn't match");
1549 }
1550
1552 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1553 ResultTy(ResultTy) {}
1554
1555 ~VPWidenCastRecipe() override = default;
1556
1558 if (auto *UV = getUnderlyingValue())
1559 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1560 *cast<CastInst>(UV));
1561
1562 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1563 }
1564
1565 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1566
1567 /// Produce widened copies of the cast.
1568 void execute(VPTransformState &State) override;
1569
1570 /// Return the cost of this VPWidenCastRecipe.
1572 VPCostContext &Ctx) const override;
1573
1574#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1575 /// Print the recipe.
1576 void print(raw_ostream &O, const Twine &Indent,
1577 VPSlotTracker &SlotTracker) const override;
1578#endif
1579
1580 Instruction::CastOps getOpcode() const { return Opcode; }
1581
1582 /// Returns the result type of the cast.
1583 Type *getResultType() const { return ResultTy; }
1584};
1585
1586/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1588 Instruction::CastOps Opcode;
1589
1590 Type *ResultTy;
1591
1592 Value *generate(VPTransformState &State);
1593
1594public:
1596 DebugLoc DL)
1597 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
1598 ResultTy(ResultTy) {}
1599
1600 ~VPScalarCastRecipe() override = default;
1601
1603 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
1604 getDebugLoc());
1605 }
1606
1607 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1608
1609 void execute(VPTransformState &State) override;
1610
1611 /// Return the cost of this VPScalarCastRecipe.
1613 VPCostContext &Ctx) const override {
1614 // TODO: Compute accurate cost after retiring the legacy cost model.
1615 return 0;
1616 }
1617
1618#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1619 void print(raw_ostream &O, const Twine &Indent,
1620 VPSlotTracker &SlotTracker) const override;
1621#endif
1622
1623 /// Returns the result type of the cast.
1624 Type *getResultType() const { return ResultTy; }
1625
1626 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1627 // At the moment, only uniform codegen is implemented.
1629 "Op must be an operand of the recipe");
1630 return true;
1631 }
1632};
1633
1634/// A recipe for widening vector intrinsics.
1636 /// ID of the vector intrinsic to widen.
1637 Intrinsic::ID VectorIntrinsicID;
1638
1639 /// Scalar return type of the intrinsic.
1640 Type *ResultTy;
1641
1642 /// True if the intrinsic may read from memory.
1643 bool MayReadFromMemory;
1644
1645 /// True if the intrinsic may read write to memory.
1646 bool MayWriteToMemory;
1647
1648 /// True if the intrinsic may have side-effects.
1649 bool MayHaveSideEffects;
1650
1651public:
1653 ArrayRef<VPValue *> CallArguments, Type *Ty,
1654 DebugLoc DL = {})
1655 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1656 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1657 MayReadFromMemory(CI.mayReadFromMemory()),
1658 MayWriteToMemory(CI.mayWriteToMemory()),
1659 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1660
1662 ArrayRef<VPValue *> CallArguments, Type *Ty,
1663 DebugLoc DL = {})
1664 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1665 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1666 LLVMContext &Ctx = Ty->getContext();
1667 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1668 MemoryEffects ME = Attrs.getMemoryEffects();
1669 MayReadFromMemory = ME.onlyWritesMemory();
1670 MayWriteToMemory = ME.onlyReadsMemory();
1671 MayHaveSideEffects = MayWriteToMemory ||
1672 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1673 !Attrs.hasFnAttr(Attribute::WillReturn);
1674 }
1675
1677 std::initializer_list<VPValue *> CallArguments,
1678 Type *Ty, DebugLoc DL = {})
1679 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1680 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1681
1682 ~VPWidenIntrinsicRecipe() override = default;
1683
1685 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1686 VectorIntrinsicID, {op_begin(), op_end()},
1687 ResultTy, getDebugLoc());
1688 }
1689
1690 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1691
1692 /// Produce a widened version of the vector intrinsic.
1693 void execute(VPTransformState &State) override;
1694
1695 /// Return the cost of this vector intrinsic.
1697 VPCostContext &Ctx) const override;
1698
1699 /// Return the ID of the intrinsic.
1700 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1701
1702 /// Return the scalar return type of the intrinsic.
1703 Type *getResultType() const { return ResultTy; }
1704
1705 /// Return to name of the intrinsic as string.
1707
1708 /// Returns true if the intrinsic may read from memory.
1709 bool mayReadFromMemory() const { return MayReadFromMemory; }
1710
1711 /// Returns true if the intrinsic may write to memory.
1712 bool mayWriteToMemory() const { return MayWriteToMemory; }
1713
1714 /// Returns true if the intrinsic may have side-effects.
1715 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1716
1717#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1718 /// Print the recipe.
1719 void print(raw_ostream &O, const Twine &Indent,
1720 VPSlotTracker &SlotTracker) const override;
1721#endif
1722
1723 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1724};
1725
1726/// A recipe for widening Call instructions using library calls.
1728 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1729 /// between a given VF and the chosen vectorized variant, so there will be a
1730 /// different VPlan for each VF with a valid variant.
1731 Function *Variant;
1732
1733public:
1735 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1736 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1737 *cast<Instruction>(UV)),
1738 Variant(Variant) {
1739 assert(
1740 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1741 "last operand must be the called function");
1742 }
1743
1744 ~VPWidenCallRecipe() override = default;
1745
1747 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1748 {op_begin(), op_end()}, getDebugLoc());
1749 }
1750
1751 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1752
1753 /// Produce a widened version of the call instruction.
1754 void execute(VPTransformState &State) override;
1755
1756 /// Return the cost of this VPWidenCallRecipe.
1758 VPCostContext &Ctx) const override;
1759
1761 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1762 }
1763
1765 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1766 }
1768 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1769 }
1770
1771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1772 /// Print the recipe.
1773 void print(raw_ostream &O, const Twine &Indent,
1774 VPSlotTracker &SlotTracker) const override;
1775#endif
1776};
1777
1778/// A recipe representing a sequence of load -> update -> store as part of
1779/// a histogram operation. This means there may be aliasing between vector
1780/// lanes, which is handled by the llvm.experimental.vector.histogram family
1781/// of intrinsics. The only update operations currently supported are
1782/// 'add' and 'sub' where the other term is loop-invariant.
1784 /// Opcode of the update operation, currently either add or sub.
1785 unsigned Opcode;
1786
1787public:
1788 template <typename IterT>
1789 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1790 DebugLoc DL = {})
1791 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1792
1793 ~VPHistogramRecipe() override = default;
1794
1796 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1797 }
1798
1799 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1800
1801 /// Produce a vectorized histogram operation.
1802 void execute(VPTransformState &State) override;
1803
1804 /// Return the cost of this VPHistogramRecipe.
1806 VPCostContext &Ctx) const override;
1807
1808 unsigned getOpcode() const { return Opcode; }
1809
1810 /// Return the mask operand if one was provided, or a null pointer if all
1811 /// lanes should be executed unconditionally.
1812 VPValue *getMask() const {
1813 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1814 }
1815
1816#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1817 /// Print the recipe
1818 void print(raw_ostream &O, const Twine &Indent,
1819 VPSlotTracker &SlotTracker) const override;
1820#endif
1821};
1822
1823/// A recipe for widening select instructions.
1825 template <typename IterT>
1827 : VPRecipeWithIRFlags(VPDef::VPWidenSelectSC, Operands, I) {}
1828
1829 ~VPWidenSelectRecipe() override = default;
1830
1832 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1833 operands());
1834 }
1835
1836 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1837
1838 /// Produce a widened version of the select instruction.
1839 void execute(VPTransformState &State) override;
1840
1841 /// Return the cost of this VPWidenSelectRecipe.
1843 VPCostContext &Ctx) const override;
1844
1845#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1846 /// Print the recipe.
1847 void print(raw_ostream &O, const Twine &Indent,
1848 VPSlotTracker &SlotTracker) const override;
1849#endif
1850
1851 VPValue *getCond() const {
1852 return getOperand(0);
1853 }
1854
1855 bool isInvariantCond() const {
1857 }
1858};
1859
1860/// A recipe for handling GEP instructions.
1862 bool isPointerLoopInvariant() const {
1864 }
1865
1866 bool isIndexLoopInvariant(unsigned I) const {
1868 }
1869
1870 bool areAllOperandsInvariant() const {
1871 return all_of(operands(), [](VPValue *Op) {
1872 return Op->isDefinedOutsideLoopRegions();
1873 });
1874 }
1875
1876public:
1877 template <typename IterT>
1879 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1880
1881 ~VPWidenGEPRecipe() override = default;
1882
1884 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1885 operands());
1886 }
1887
1888 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1889
1890 /// Generate the gep nodes.
1891 void execute(VPTransformState &State) override;
1892
1893 /// Return the cost of this VPWidenGEPRecipe.
1895 VPCostContext &Ctx) const override {
1896 // TODO: Compute accurate cost after retiring the legacy cost model.
1897 return 0;
1898 }
1899
1900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1901 /// Print the recipe.
1902 void print(raw_ostream &O, const Twine &Indent,
1903 VPSlotTracker &SlotTracker) const override;
1904#endif
1905};
1906
1907/// A recipe to compute the pointers for widened memory accesses of IndexTy
1908/// in reverse order.
1910 public VPUnrollPartAccessor<2> {
1911 Type *IndexedTy;
1912
1913public:
1916 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1917 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1918 IndexedTy(IndexedTy) {}
1919
1920 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1921
1923 const VPValue *getVFValue() const { return getOperand(1); }
1924
1925 void execute(VPTransformState &State) override;
1926
1927 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1929 "Op must be an operand of the recipe");
1930 return true;
1931 }
1932
1933 /// Return the cost of this VPVectorPointerRecipe.
1935 VPCostContext &Ctx) const override {
1936 // TODO: Compute accurate cost after retiring the legacy cost model.
1937 return 0;
1938 }
1939
1940 /// Returns true if the recipe only uses the first part of operand \p Op.
1941 bool onlyFirstPartUsed(const VPValue *Op) const override {
1943 "Op must be an operand of the recipe");
1944 assert(getNumOperands() <= 2 && "must have at most two operands");
1945 return true;
1946 }
1947
1950 IndexedTy, getGEPNoWrapFlags(),
1951 getDebugLoc());
1952 }
1953
1954#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1955 /// Print the recipe.
1956 void print(raw_ostream &O, const Twine &Indent,
1957 VPSlotTracker &SlotTracker) const override;
1958#endif
1959};
1960
1961/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1963 public VPUnrollPartAccessor<1> {
1964 Type *IndexedTy;
1965
1966public:
1968 DebugLoc DL)
1969 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1970 GEPFlags, DL),
1971 IndexedTy(IndexedTy) {}
1972
1973 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1974
1975 void execute(VPTransformState &State) override;
1976
1977 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1979 "Op must be an operand of the recipe");
1980 return true;
1981 }
1982
1983 /// Returns true if the recipe only uses the first part of operand \p Op.
1984 bool onlyFirstPartUsed(const VPValue *Op) const override {
1986 "Op must be an operand of the recipe");
1987 assert(getNumOperands() <= 2 && "must have at most two operands");
1988 return true;
1989 }
1990
1992 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1994 }
1995
1996 /// Return the cost of this VPHeaderPHIRecipe.
1998 VPCostContext &Ctx) const override {
1999 // TODO: Compute accurate cost after retiring the legacy cost model.
2000 return 0;
2001 }
2002
2003#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2004 /// Print the recipe.
2005 void print(raw_ostream &O, const Twine &Indent,
2006 VPSlotTracker &SlotTracker) const override;
2007#endif
2008};
2009
2010/// A pure virtual base class for all recipes modeling header phis, including
2011/// phis for first order recurrences, pointer inductions and reductions. The
2012/// start value is the first operand of the recipe and the incoming value from
2013/// the backedge is the second operand.
2014///
2015/// Inductions are modeled using the following sub-classes:
2016/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2017/// starting at a specified value (zero for the main vector loop, the resume
2018/// value for the epilogue vector loop) and stepping by 1. The induction
2019/// controls exiting of the vector loop by comparing against the vector trip
2020/// count. Produces a single scalar PHI for the induction value per
2021/// iteration.
2022/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2023/// floating point inductions with arbitrary start and step values. Produces
2024/// a vector PHI per-part.
2025/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2026/// value of an IV with different start and step values. Produces a single
2027/// scalar value per iteration
2028/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2029/// canonical or derived induction.
2030/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2031/// pointer induction. Produces either a vector PHI per-part or scalar values
2032/// per-lane based on the canonical induction.
2034protected:
2035 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2036 VPValue *Start = nullptr, DebugLoc DL = {})
2037 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2038 if (Start)
2039 addOperand(Start);
2040 }
2041
2042public:
2043 ~VPHeaderPHIRecipe() override = default;
2044
2045 /// Method to support type inquiry through isa, cast, and dyn_cast.
2046 static inline bool classof(const VPRecipeBase *B) {
2047 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2048 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2049 }
2050 static inline bool classof(const VPValue *V) {
2051 auto *B = V->getDefiningRecipe();
2052 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2053 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2054 }
2055
2056 /// Generate the phi nodes.
2057 void execute(VPTransformState &State) override = 0;
2058
2059 /// Return the cost of this header phi recipe.
2061 VPCostContext &Ctx) const override;
2062
2063#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2064 /// Print the recipe.
2065 void print(raw_ostream &O, const Twine &Indent,
2066 VPSlotTracker &SlotTracker) const override = 0;
2067#endif
2068
2069 /// Returns the start value of the phi, if one is set.
2071 return getNumOperands() == 0 ? nullptr : getOperand(0);
2072 }
2074 return getNumOperands() == 0 ? nullptr : getOperand(0);
2075 }
2076
2077 /// Update the start value of the recipe.
2079
2080 /// Returns the incoming value from the loop backedge.
2082 return getOperand(1);
2083 }
2084
2085 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2086 /// to be a recipe.
2089 }
2090};
2091
2092/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2093/// VPWidenPointerInductionRecipe), providing shared functionality, including
2094/// retrieving the step value, induction descriptor and original phi node.
2096 const InductionDescriptor &IndDesc;
2097
2098public:
2099 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2100 VPValue *Step, const InductionDescriptor &IndDesc,
2101 DebugLoc DL)
2102 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2103 addOperand(Step);
2104 }
2105
2106 static inline bool classof(const VPRecipeBase *R) {
2107 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2108 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2109 }
2110
2111 static inline bool classof(const VPValue *V) {
2112 auto *R = V->getDefiningRecipe();
2113 return R && classof(R);
2114 }
2115
2116 static inline bool classof(const VPHeaderPHIRecipe *R) {
2117 return classof(static_cast<const VPRecipeBase *>(R));
2118 }
2119
2120 virtual void execute(VPTransformState &State) override = 0;
2121
2122 /// Returns the step value of the induction.
2124 const VPValue *getStepValue() const { return getOperand(1); }
2125
2126 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2127
2128 /// Returns the induction descriptor for the recipe.
2129 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2130
2132 // TODO: All operands of base recipe must exist and be at same index in
2133 // derived recipe.
2135 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2136 }
2137
2139 // TODO: All operands of base recipe must exist and be at same index in
2140 // derived recipe.
2142 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2143 }
2144};
2145
2146/// A recipe for handling phi nodes of integer and floating-point inductions,
2147/// producing their vector values.
2149 TruncInst *Trunc;
2150
2151public:
2153 VPValue *VF, const InductionDescriptor &IndDesc,
2154 DebugLoc DL)
2155 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2156 Step, IndDesc, DL),
2157 Trunc(nullptr) {
2158 addOperand(VF);
2159 }
2160
2162 VPValue *VF, const InductionDescriptor &IndDesc,
2163 TruncInst *Trunc, DebugLoc DL)
2164 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2165 Step, IndDesc, DL),
2166 Trunc(Trunc) {
2167 addOperand(VF);
2168 }
2169
2171
2176 }
2177
2178 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2179
2180 /// Generate the vectorized and scalarized versions of the phi node as
2181 /// needed by their users.
2182 void execute(VPTransformState &State) override;
2183
2184#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2185 /// Print the recipe.
2186 void print(raw_ostream &O, const Twine &Indent,
2187 VPSlotTracker &SlotTracker) const override;
2188#endif
2189
2191 const VPValue *getVFValue() const { return getOperand(2); }
2192
2194 // If the recipe has been unrolled (4 operands), return the VPValue for the
2195 // induction increment.
2196 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2197 }
2198
2199 /// Returns the first defined value as TruncInst, if it is one or nullptr
2200 /// otherwise.
2201 TruncInst *getTruncInst() { return Trunc; }
2202 const TruncInst *getTruncInst() const { return Trunc; }
2203
2204 /// Returns true if the induction is canonical, i.e. starting at 0 and
2205 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2206 /// same type as the canonical induction.
2207 bool isCanonical() const;
2208
2209 /// Returns the scalar type of the induction.
2211 return Trunc ? Trunc->getType() : getPHINode()->getType();
2212 }
2213
2214 /// Returns the VPValue representing the value of this induction at
2215 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2216 /// take place.
2218 return getNumOperands() == 5 ? getOperand(4) : this;
2219 }
2220};
2221
2223 public VPUnrollPartAccessor<3> {
2224 bool IsScalarAfterVectorization;
2225
2226public:
2227 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2228 /// Start.
2230 const InductionDescriptor &IndDesc,
2231 bool IsScalarAfterVectorization, DebugLoc DL)
2232 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2233 Step, IndDesc, DL),
2234 IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2235
2237
2240 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2241 getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2242 }
2243
2244 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2245
2246 /// Generate vector values for the pointer induction.
2247 void execute(VPTransformState &State) override;
2248
2249 /// Returns true if only scalar values will be generated.
2250 bool onlyScalarsGenerated(bool IsScalable);
2251
2252 /// Returns the VPValue representing the value of this induction at
2253 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2254 /// take place.
2256 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2257 }
2258
2259#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2260 /// Print the recipe.
2261 void print(raw_ostream &O, const Twine &Indent,
2262 VPSlotTracker &SlotTracker) const override;
2263#endif
2264};
2265
2266/// Recipe to generate a scalar PHI. Used to generate code for recipes that
2267/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2268/// VPEVLBasedIVPHIRecipe.
2270 std::string Name;
2271
2272public:
2273 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2274 StringRef Name)
2275 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2276 Name(Name.str()) {
2277 addOperand(BackedgeValue);
2278 }
2279
2280 ~VPScalarPHIRecipe() override = default;
2281
2283 llvm_unreachable("cloning not implemented yet");
2284 }
2285
2286 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2287
2288 /// Generate the phi/select nodes.
2289 void execute(VPTransformState &State) override;
2290
2291 /// Returns true if the recipe only uses the first lane of operand \p Op.
2292 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2294 "Op must be an operand of the recipe");
2295 return true;
2296 }
2297
2298#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2299 /// Print the recipe.
2300 void print(raw_ostream &O, const Twine &Indent,
2301 VPSlotTracker &SlotTracker) const override;
2302#endif
2303};
2304
2305/// A recipe for handling phis that are widened in the vector loop.
2306/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2307/// managed in the recipe directly.
2309 /// List of incoming blocks. Only used in the VPlan native path.
2310 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2311
2312public:
2313 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start and
2314 /// debug location \p DL.
2315 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr, DebugLoc DL = {})
2316 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi, DL) {
2317 if (Start)
2318 addOperand(Start);
2319 }
2320
2322 llvm_unreachable("cloning not implemented yet");
2323 }
2324
2325 ~VPWidenPHIRecipe() override = default;
2326
2327 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2328
2329 /// Generate the phi/select nodes.
2330 void execute(VPTransformState &State) override;
2331
2332#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2333 /// Print the recipe.
2334 void print(raw_ostream &O, const Twine &Indent,
2335 VPSlotTracker &SlotTracker) const override;
2336#endif
2337
2338 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2339 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2340 addOperand(IncomingV);
2341 IncomingBlocks.push_back(IncomingBlock);
2342 }
2343
2344 /// Returns the \p I th incoming VPBasicBlock.
2345 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2346
2347 /// Returns the \p I th incoming VPValue.
2348 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2349};
2350
2351/// A recipe for handling first-order recurrence phis. The start value is the
2352/// first operand of the recipe and the incoming value from the backedge is the
2353/// second operand.
2356 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2357
2358 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2359
2361 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2362 }
2363
2366 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2367 }
2368
2369 void execute(VPTransformState &State) override;
2370
2371 /// Return the cost of this first-order recurrence phi recipe.
2373 VPCostContext &Ctx) const override;
2374
2375#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2376 /// Print the recipe.
2377 void print(raw_ostream &O, const Twine &Indent,
2378 VPSlotTracker &SlotTracker) const override;
2379#endif
2380};
2381
2382/// A recipe for handling reduction phis. The start value is the first operand
2383/// of the recipe and the incoming value from the backedge is the second
2384/// operand.
2386 public VPUnrollPartAccessor<2> {
2387 /// Descriptor for the reduction.
2388 const RecurrenceDescriptor &RdxDesc;
2389
2390 /// The phi is part of an in-loop reduction.
2391 bool IsInLoop;
2392
2393 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2394 bool IsOrdered;
2395
2396 /// When expanding the reduction PHI, the plan's VF element count is divided
2397 /// by this factor to form the reduction phi's VF.
2398 unsigned VFScaleFactor = 1;
2399
2400public:
2401 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2402 /// RdxDesc.
2404 VPValue &Start, bool IsInLoop = false,
2405 bool IsOrdered = false, unsigned VFScaleFactor = 1)
2406 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2407 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2408 VFScaleFactor(VFScaleFactor) {
2409 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2410 }
2411
2412 ~VPReductionPHIRecipe() override = default;
2413
2415 auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
2416 RdxDesc, *getOperand(0), IsInLoop,
2417 IsOrdered, VFScaleFactor);
2418 R->addOperand(getBackedgeValue());
2419 return R;
2420 }
2421
2422 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2423
2425 return R->getVPDefID() == VPDef::VPReductionPHISC;
2426 }
2427
2428 /// Generate the phi/select nodes.
2429 void execute(VPTransformState &State) override;
2430
2431#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2432 /// Print the recipe.
2433 void print(raw_ostream &O, const Twine &Indent,
2434 VPSlotTracker &SlotTracker) const override;
2435#endif
2436
2438 return RdxDesc;
2439 }
2440
2441 /// Returns true, if the phi is part of an ordered reduction.
2442 bool isOrdered() const { return IsOrdered; }
2443
2444 /// Returns true, if the phi is part of an in-loop reduction.
2445 bool isInLoop() const { return IsInLoop; }
2446};
2447
2448/// A recipe for forming partial reductions. In the loop, an accumulator and
2449/// vector operand are added together and passed to the next iteration as the
2450/// next accumulator. After the loop body, the accumulator is reduced to a
2451/// scalar value.
2453 unsigned Opcode;
2454
2455public:
2457 VPValue *Op1)
2458 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1,
2459 ReductionInst) {}
2460 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2461 Instruction *ReductionInst = nullptr)
2462 : VPSingleDefRecipe(VPDef::VPPartialReductionSC,
2463 ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
2464 Opcode(Opcode) {
2465 [[maybe_unused]] auto *AccumulatorRecipe =
2467 assert((isa<VPReductionPHIRecipe>(AccumulatorRecipe) ||
2468 isa<VPPartialReductionRecipe>(AccumulatorRecipe)) &&
2469 "Unexpected operand order for partial reduction recipe");
2470 }
2471 ~VPPartialReductionRecipe() override = default;
2472
2474 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1),
2476 }
2477
2478 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2479
2480 /// Generate the reduction in the loop.
2481 void execute(VPTransformState &State) override;
2482
2483 /// Return the cost of this VPPartialReductionRecipe.
2485 VPCostContext &Ctx) const override;
2486
2487 /// Get the binary op's opcode.
2488 unsigned getOpcode() const { return Opcode; }
2489
2490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2491 /// Print the recipe.
2492 void print(raw_ostream &O, const Twine &Indent,
2493 VPSlotTracker &SlotTracker) const override;
2494#endif
2495};
2496
2497/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2498/// instructions.
2500public:
2501 /// The blend operation is a User of the incoming values and of their
2502 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2503 /// be omitted (implied by passing an odd number of operands) in which case
2504 /// all other incoming values are merged into it.
2506 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2507 assert(Operands.size() > 0 && "Expected at least one operand!");
2508 }
2509
2510 VPBlendRecipe *clone() override {
2512 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2513 }
2514
2515 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2516
2517 /// A normalized blend is one that has an odd number of operands, whereby the
2518 /// first operand does not have an associated mask.
2519 bool isNormalized() const { return getNumOperands() % 2; }
2520
2521 /// Return the number of incoming values, taking into account when normalized
2522 /// the first incoming value will have no mask.
2523 unsigned getNumIncomingValues() const {
2524 return (getNumOperands() + isNormalized()) / 2;
2525 }
2526
2527 /// Return incoming value number \p Idx.
2528 VPValue *getIncomingValue(unsigned Idx) const {
2529 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2530 }
2531
2532 /// Return mask number \p Idx.
2533 VPValue *getMask(unsigned Idx) const {
2534 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2535 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2536 }
2537
2538 /// Generate the phi/select nodes.
2539 void execute(VPTransformState &State) override;
2540
2541 /// Return the cost of this VPWidenMemoryRecipe.
2543 VPCostContext &Ctx) const override;
2544
2545#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2546 /// Print the recipe.
2547 void print(raw_ostream &O, const Twine &Indent,
2548 VPSlotTracker &SlotTracker) const override;
2549#endif
2550
2551 /// Returns true if the recipe only uses the first lane of operand \p Op.
2552 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2554 "Op must be an operand of the recipe");
2555 // Recursing through Blend recipes only, must terminate at header phi's the
2556 // latest.
2557 return all_of(users(),
2558 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2559 }
2560};
2561
2562/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2563/// or stores into one wide load/store and shuffles. The first operand of a
2564/// VPInterleave recipe is the address, followed by the stored values, followed
2565/// by an optional mask.
2568
2569 /// Indicates if the interleave group is in a conditional block and requires a
2570 /// mask.
2571 bool HasMask = false;
2572
2573 /// Indicates if gaps between members of the group need to be masked out or if
2574 /// unusued gaps can be loaded speculatively.
2575 bool NeedsMaskForGaps = false;
2576
2577public:
2579 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2580 bool NeedsMaskForGaps)
2581 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2582 NeedsMaskForGaps(NeedsMaskForGaps) {
2583 for (unsigned i = 0; i < IG->getFactor(); ++i)
2584 if (Instruction *I = IG->getMember(i)) {
2585 if (I->getType()->isVoidTy())
2586 continue;
2587 new VPValue(I, this);
2588 }
2589
2590 for (auto *SV : StoredValues)
2591 addOperand(SV);
2592 if (Mask) {
2593 HasMask = true;
2594 addOperand(Mask);
2595 }
2596 }
2597 ~VPInterleaveRecipe() override = default;
2598
2600 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2601 NeedsMaskForGaps);
2602 }
2603
2604 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2605
2606 /// Return the address accessed by this recipe.
2607 VPValue *getAddr() const {
2608 return getOperand(0); // Address is the 1st, mandatory operand.
2609 }
2610
2611 /// Return the mask used by this recipe. Note that a full mask is represented
2612 /// by a nullptr.
2613 VPValue *getMask() const {
2614 // Mask is optional and therefore the last, currently 2nd operand.
2615 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2616 }
2617
2618 /// Return the VPValues stored by this interleave group. If it is a load
2619 /// interleave group, return an empty ArrayRef.
2621 // The first operand is the address, followed by the stored values, followed
2622 // by an optional mask.
2625 }
2626
2627 /// Generate the wide load or store, and shuffles.
2628 void execute(VPTransformState &State) override;
2629
2630 /// Return the cost of this VPInterleaveRecipe.
2632 VPCostContext &Ctx) const override;
2633
2634#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2635 /// Print the recipe.
2636 void print(raw_ostream &O, const Twine &Indent,
2637 VPSlotTracker &SlotTracker) const override;
2638#endif
2639
2641
2642 /// Returns the number of stored operands of this interleave group. Returns 0
2643 /// for load interleave groups.
2644 unsigned getNumStoreOperands() const {
2645 return getNumOperands() - (HasMask ? 2 : 1);
2646 }
2647
2648 /// The recipe only uses the first lane of the address.
2649 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2651 "Op must be an operand of the recipe");
2652 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2653 }
2654
2655 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2656};
2657
2658/// A recipe to represent inloop reduction operations, performing a reduction on
2659/// a vector operand into a scalar value, and adding the result to a chain.
2660/// The Operands are {ChainOp, VecOp, [Condition]}.
2662 /// The recurrence decriptor for the reduction in question.
2663 const RecurrenceDescriptor &RdxDesc;
2664 bool IsOrdered;
2665 /// Whether the reduction is conditional.
2666 bool IsConditional = false;
2667
2668protected:
2669 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2671 VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2672 : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2673 IsOrdered(IsOrdered) {
2674 if (CondOp) {
2675 IsConditional = true;
2676 addOperand(CondOp);
2677 }
2678 }
2679
2680public:
2682 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2683 bool IsOrdered, DebugLoc DL = {})
2684 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2685 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2686 IsOrdered, DL) {}
2687
2688 ~VPReductionRecipe() override = default;
2689
2691 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2692 getVecOp(), getCondOp(), IsOrdered,
2693 getDebugLoc());
2694 }
2695
2696 static inline bool classof(const VPRecipeBase *R) {
2697 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2698 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2699 }
2700
2701 static inline bool classof(const VPUser *U) {
2702 auto *R = dyn_cast<VPRecipeBase>(U);
2703 return R && classof(R);
2704 }
2705
2706 /// Generate the reduction in the loop.
2707 void execute(VPTransformState &State) override;
2708
2709 /// Return the cost of VPReductionRecipe.
2711 VPCostContext &Ctx) const override;
2712
2713#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2714 /// Print the recipe.
2715 void print(raw_ostream &O, const Twine &Indent,
2716 VPSlotTracker &SlotTracker) const override;
2717#endif
2718
2719 /// Return the recurrence decriptor for the in-loop reduction.
2721 return RdxDesc;
2722 }
2723 /// Return true if the in-loop reduction is ordered.
2724 bool isOrdered() const { return IsOrdered; };
2725 /// Return true if the in-loop reduction is conditional.
2726 bool isConditional() const { return IsConditional; };
2727 /// The VPValue of the scalar Chain being accumulated.
2728 VPValue *getChainOp() const { return getOperand(0); }
2729 /// The VPValue of the vector value to be reduced.
2730 VPValue *getVecOp() const { return getOperand(1); }
2731 /// The VPValue of the condition for the block.
2733 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2734 }
2735};
2736
2737/// A recipe to represent inloop reduction operations with vector-predication
2738/// intrinsics, performing a reduction on a vector operand with the explicit
2739/// vector length (EVL) into a scalar value, and adding the result to a chain.
2740/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2742public:
2745 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2747 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2748 R.isOrdered(), R.getDebugLoc()) {}
2749
2750 ~VPReductionEVLRecipe() override = default;
2751
2753 llvm_unreachable("cloning not implemented yet");
2754 }
2755
2756 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2757
2758 /// Generate the reduction in the loop
2759 void execute(VPTransformState &State) override;
2760
2761#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2762 /// Print the recipe.
2763 void print(raw_ostream &O, const Twine &Indent,
2764 VPSlotTracker &SlotTracker) const override;
2765#endif
2766
2767 /// The VPValue of the explicit vector length.
2768 VPValue *getEVL() const { return getOperand(2); }
2769
2770 /// Returns true if the recipe only uses the first lane of operand \p Op.
2771 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2773 "Op must be an operand of the recipe");
2774 return Op == getEVL();
2775 }
2776};
2777
2778/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2779/// copies of the original scalar type, one per lane, instead of producing a
2780/// single copy of widened type for all lanes. If the instruction is known to be
2781/// uniform only one copy, per lane zero, will be generated.
2783 /// Indicator if only a single replica per lane is needed.
2784 bool IsUniform;
2785
2786 /// Indicator if the replicas are also predicated.
2787 bool IsPredicated;
2788
2789public:
2790 template <typename IterT>
2792 bool IsUniform, VPValue *Mask = nullptr)
2793 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2794 IsUniform(IsUniform), IsPredicated(Mask) {
2795 if (Mask)
2796 addOperand(Mask);
2797 }
2798
2799 ~VPReplicateRecipe() override = default;
2800
2802 auto *Copy =
2803 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2804 isPredicated() ? getMask() : nullptr);
2805 Copy->transferFlags(*this);
2806 return Copy;
2807 }
2808
2809 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2810
2811 /// Generate replicas of the desired Ingredient. Replicas will be generated
2812 /// for all parts and lanes unless a specific part and lane are specified in
2813 /// the \p State.
2814 void execute(VPTransformState &State) override;
2815
2816 /// Return the cost of this VPReplicateRecipe.
2818 VPCostContext &Ctx) const override;
2819
2820#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2821 /// Print the recipe.
2822 void print(raw_ostream &O, const Twine &Indent,
2823 VPSlotTracker &SlotTracker) const override;
2824#endif
2825
2826 bool isUniform() const { return IsUniform; }
2827
2828 bool isPredicated() const { return IsPredicated; }
2829
2830 /// Returns true if the recipe only uses the first lane of operand \p Op.
2831 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2833 "Op must be an operand of the recipe");
2834 return isUniform();
2835 }
2836
2837 /// Returns true if the recipe uses scalars of operand \p Op.
2838 bool usesScalars(const VPValue *Op) const override {
2840 "Op must be an operand of the recipe");
2841 return true;
2842 }
2843
2844 /// Returns true if the recipe is used by a widened recipe via an intervening
2845 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2846 /// in a vector.
2847 bool shouldPack() const;
2848
2849 /// Return the mask of a predicated VPReplicateRecipe.
2851 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2852 return getOperand(getNumOperands() - 1);
2853 }
2854
2855 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2856};
2857
2858/// A recipe for generating conditional branches on the bits of a mask.
2860public:
2862 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2863 if (BlockInMask) // nullptr means all-one mask.
2864 addOperand(BlockInMask);
2865 }
2866
2868 return new VPBranchOnMaskRecipe(getOperand(0));
2869 }
2870
2871 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2872
2873 /// Generate the extraction of the appropriate bit from the block mask and the
2874 /// conditional branch.
2875 void execute(VPTransformState &State) override;
2876
2877 /// Return the cost of this VPBranchOnMaskRecipe.
2879 VPCostContext &Ctx) const override;
2880
2881#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2882 /// Print the recipe.
2883 void print(raw_ostream &O, const Twine &Indent,
2884 VPSlotTracker &SlotTracker) const override {
2885 O << Indent << "BRANCH-ON-MASK ";
2886 if (VPValue *Mask = getMask())
2887 Mask->printAsOperand(O, SlotTracker);
2888 else
2889 O << " All-One";
2890 }
2891#endif
2892
2893 /// Return the mask used by this recipe. Note that a full mask is represented
2894 /// by a nullptr.
2895 VPValue *getMask() const {
2896 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2897 // Mask is optional.
2898 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2899 }
2900
2901 /// Returns true if the recipe uses scalars of operand \p Op.
2902 bool usesScalars(const VPValue *Op) const override {
2904 "Op must be an operand of the recipe");
2905 return true;
2906 }
2907};
2908
2909/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2910/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2911/// order to merge values that are set under such a branch and feed their uses.
2912/// The phi nodes can be scalar or vector depending on the users of the value.
2913/// This recipe works in concert with VPBranchOnMaskRecipe.
2915public:
2916 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2917 /// nodes after merging back from a Branch-on-Mask.
2919 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
2920 ~VPPredInstPHIRecipe() override = default;
2921
2923 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
2924 }
2925
2926 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2927
2928 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2929 /// retain SSA form.
2930 void execute(VPTransformState &State) override;
2931
2932 /// Return the cost of this VPPredInstPHIRecipe.
2934 VPCostContext &Ctx) const override {
2935 // TODO: Compute accurate cost after retiring the legacy cost model.
2936 return 0;
2937 }
2938
2939#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2940 /// Print the recipe.
2941 void print(raw_ostream &O, const Twine &Indent,
2942 VPSlotTracker &SlotTracker) const override;
2943#endif
2944
2945 /// Returns true if the recipe uses scalars of operand \p Op.
2946 bool usesScalars(const VPValue *Op) const override {
2948 "Op must be an operand of the recipe");
2949 return true;
2950 }
2951};
2952
2953/// A common base class for widening memory operations. An optional mask can be
2954/// provided as the last operand.
2956protected:
2958
2959 /// Whether the accessed addresses are consecutive.
2961
2962 /// Whether the consecutive accessed addresses are in reverse order.
2964
2965 /// Whether the memory access is masked.
2966 bool IsMasked = false;
2967
2968 void setMask(VPValue *Mask) {
2969 assert(!IsMasked && "cannot re-set mask");
2970 if (!Mask)
2971 return;
2972 addOperand(Mask);
2973 IsMasked = true;
2974 }
2975
2976 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2977 std::initializer_list<VPValue *> Operands,
2978 bool Consecutive, bool Reverse, DebugLoc DL)
2980 Reverse(Reverse) {
2981 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2982 }
2983
2984public:
2986 llvm_unreachable("cloning not supported");
2987 }
2988
2989 static inline bool classof(const VPRecipeBase *R) {
2990 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2991 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2992 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2993 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2994 }
2995
2996 static inline bool classof(const VPUser *U) {
2997 auto *R = dyn_cast<VPRecipeBase>(U);
2998 return R && classof(R);
2999 }
3000
3001 /// Return whether the loaded-from / stored-to addresses are consecutive.
3002 bool isConsecutive() const { return Consecutive; }
3003
3004 /// Return whether the consecutive loaded/stored addresses are in reverse
3005 /// order.
3006 bool isReverse() const { return Reverse; }
3007
3008 /// Return the address accessed by this recipe.
3009 VPValue *getAddr() const { return getOperand(0); }
3010
3011 /// Returns true if the recipe is masked.
3012 bool isMasked() const { return IsMasked; }
3013
3014 /// Return the mask used by this recipe. Note that a full mask is represented
3015 /// by a nullptr.
3016 VPValue *getMask() const {
3017 // Mask is optional and therefore the last operand.
3018 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3019 }
3020
3021 /// Generate the wide load/store.
3022 void execute(VPTransformState &State) override {
3023 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3024 }
3025
3026 /// Return the cost of this VPWidenMemoryRecipe.
3028 VPCostContext &Ctx) const override;
3029
3031};
3032
3033/// A recipe for widening load operations, using the address to load from and an
3034/// optional mask.
3035struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
3037 bool Consecutive, bool Reverse, DebugLoc DL)
3038 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3039 Reverse, DL),
3040 VPValue(this, &Load) {
3041 setMask(Mask);
3042 }
3043
3045 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3047 getDebugLoc());
3048 }
3049
3050 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3051
3052 /// Generate a wide load or gather.
3053 void execute(VPTransformState &State) override;
3054
3055#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3056 /// Print the recipe.
3057 void print(raw_ostream &O, const Twine &Indent,
3058 VPSlotTracker &SlotTracker) const override;
3059#endif
3060
3061 /// Returns true if the recipe only uses the first lane of operand \p Op.
3062 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3064 "Op must be an operand of the recipe");
3065 // Widened, consecutive loads operations only demand the first lane of
3066 // their address.
3067 return Op == getAddr() && isConsecutive();
3068 }
3069};
3070
3071/// A recipe for widening load operations with vector-predication intrinsics,
3072/// using the address to load from, the explicit vector length and an optional
3073/// mask.
3074struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3076 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3077 {L.getAddr(), &EVL}, L.isConsecutive(),
3078 L.isReverse(), L.getDebugLoc()),
3079 VPValue(this, &getIngredient()) {
3080 setMask(Mask);
3081 }
3082
3083 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3084
3085 /// Return the EVL operand.
3086 VPValue *getEVL() const { return getOperand(1); }
3087
3088 /// Generate the wide load or gather.
3089 void execute(VPTransformState &State) override;
3090
3091 /// Return the cost of this VPWidenLoadEVLRecipe.
3093 VPCostContext &Ctx) const override;
3094
3095#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3096 /// Print the recipe.
3097 void print(raw_ostream &O, const Twine &Indent,
3098 VPSlotTracker &SlotTracker) const override;
3099#endif
3100
3101 /// Returns true if the recipe only uses the first lane of operand \p Op.
3102 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3104 "Op must be an operand of the recipe");
3105 // Widened loads only demand the first lane of EVL and consecutive loads
3106 // only demand the first lane of their address.
3107 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3108 }
3109};
3110
3111/// A recipe for widening store operations, using the stored value, the address
3112/// to store to and an optional mask.
3115 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3116 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3118 setMask(Mask);
3119 }
3120
3122 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3124 Reverse, getDebugLoc());
3125 }
3126
3127 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3128
3129 /// Return the value stored by this recipe.
3130 VPValue *getStoredValue() const { return getOperand(1); }
3131
3132 /// Generate a wide store or scatter.
3133 void execute(VPTransformState &State) override;
3134
3135#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3136 /// Print the recipe.
3137 void print(raw_ostream &O, const Twine &Indent,
3138 VPSlotTracker &SlotTracker) const override;
3139#endif
3140
3141 /// Returns true if the recipe only uses the first lane of operand \p Op.
3142 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3144 "Op must be an operand of the recipe");
3145 // Widened, consecutive stores only demand the first lane of their address,
3146 // unless the same operand is also stored.
3147 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3148 }
3149};
3150
3151/// A recipe for widening store operations with vector-predication intrinsics,
3152/// using the value to store, the address to store to, the explicit vector
3153/// length and an optional mask.
3156 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3157 {S.getAddr(), S.getStoredValue(), &EVL},
3158 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3159 setMask(Mask);
3160 }
3161
3162 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3163
3164 /// Return the address accessed by this recipe.
3165 VPValue *getStoredValue() const { return getOperand(1); }
3166
3167 /// Return the EVL operand.
3168 VPValue *getEVL() const { return getOperand(2); }
3169
3170 /// Generate the wide store or scatter.
3171 void execute(VPTransformState &State) override;
3172
3173 /// Return the cost of this VPWidenStoreEVLRecipe.
3175 VPCostContext &Ctx) const override;
3176
3177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3178 /// Print the recipe.
3179 void print(raw_ostream &O, const Twine &Indent,
3180 VPSlotTracker &SlotTracker) const override;
3181#endif
3182
3183 /// Returns true if the recipe only uses the first lane of operand \p Op.
3184 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3186 "Op must be an operand of the recipe");
3187 if (Op == getEVL()) {
3188 assert(getStoredValue() != Op && "unexpected store of EVL");
3189 return true;
3190 }
3191 // Widened, consecutive memory operations only demand the first lane of
3192 // their address, unless the same operand is also stored. That latter can
3193 // happen with opaque pointers.
3194 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3195 }
3196};
3197
3198/// Recipe to expand a SCEV expression.
3200 const SCEV *Expr;
3201 ScalarEvolution &SE;
3202
3203public:
3205 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3206
3207 ~VPExpandSCEVRecipe() override = default;
3208
3210 return new VPExpandSCEVRecipe(Expr, SE);
3211 }
3212
3213 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3214
3215 /// Generate a canonical vector induction variable of the vector loop, with
3216 void execute(VPTransformState &State) override;
3217
3218 /// Return the cost of this VPExpandSCEVRecipe.
3220 VPCostContext &Ctx) const override {
3221 // TODO: Compute accurate cost after retiring the legacy cost model.
3222 return 0;
3223 }
3224
3225#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3226 /// Print the recipe.
3227 void print(raw_ostream &O, const Twine &Indent,
3228 VPSlotTracker &SlotTracker) const override;
3229#endif
3230
3231 const SCEV *getSCEV() const { return Expr; }
3232};
3233
3234/// Canonical scalar induction phi of the vector loop. Starting at the specified
3235/// start value (either 0 or the resume value when vectorizing the epilogue
3236/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3237/// canonical induction variable.
3239public:
3241 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3242
3243 ~VPCanonicalIVPHIRecipe() override = default;
3244
3246 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3247 R->addOperand(getBackedgeValue());
3248 return R;
3249 }
3250
3251 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3252
3254 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3255 }
3256
3257 void execute(VPTransformState &State) override {
3259 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3260 }
3261
3262#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3263 /// Print the recipe.
3264 void print(raw_ostream &O, const Twine &Indent,
3265 VPSlotTracker &SlotTracker) const override;
3266#endif
3267
3268 /// Returns the scalar type of the induction.
3270 return getStartValue()->getLiveInIRValue()->getType();
3271 }
3272
3273 /// Returns true if the recipe only uses the first lane of operand \p Op.
3274 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3276 "Op must be an operand of the recipe");
3277 return true;
3278 }
3279
3280 /// Returns true if the recipe only uses the first part of operand \p Op.
3281 bool onlyFirstPartUsed(const VPValue *Op) const override {
3283 "Op must be an operand of the recipe");
3284 return true;
3285 }
3286
3287 /// Return the cost of this VPCanonicalIVPHIRecipe.
3289 VPCostContext &Ctx) const override {
3290 // For now, match the behavior of the legacy cost model.
3291 return 0;
3292 }
3293};
3294
3295/// A recipe for generating the active lane mask for the vector loop that is
3296/// used to predicate the vector operations.
3297/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3298/// remove VPActiveLaneMaskPHIRecipe.
3300public:
3302 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3303 DL) {}
3304
3305 ~VPActiveLaneMaskPHIRecipe() override = default;
3306
3309 if (getNumOperands() == 2)
3310 R->addOperand(getOperand(1));
3311 return R;
3312 }
3313
3314 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3315
3317 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3318 }
3319
3320 /// Generate the active lane mask phi of the vector loop.
3321 void execute(VPTransformState &State) override;
3322
3323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3324 /// Print the recipe.
3325 void print(raw_ostream &O, const Twine &Indent,
3326 VPSlotTracker &SlotTracker) const override;
3327#endif
3328};
3329
3330/// A recipe for generating the phi node for the current index of elements,
3331/// adjusted in accordance with EVL value. It starts at the start value of the
3332/// canonical induction and gets incremented by EVL in each iteration of the
3333/// vector loop.
3335public:
3337 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3338
3339 ~VPEVLBasedIVPHIRecipe() override = default;
3340
3342 llvm_unreachable("cloning not implemented yet");
3343 }
3344
3345 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3346
3348 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3349 }
3350
3351 void execute(VPTransformState &State) override {
3353 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3354 }
3355
3356 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3358 VPCostContext &Ctx) const override {
3359 // For now, match the behavior of the legacy cost model.
3360 return 0;
3361 }
3362
3363 /// Returns true if the recipe only uses the first lane of operand \p Op.
3364 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3366 "Op must be an operand of the recipe");
3367 return true;
3368 }
3369
3370#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3371 /// Print the recipe.
3372 void print(raw_ostream &O, const Twine &Indent,
3373 VPSlotTracker &SlotTracker) const override;
3374#endif
3375};
3376
3377/// A Recipe for widening the canonical induction variable of the vector loop.
3379 public VPUnrollPartAccessor<1> {
3380public:
3382 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3383
3384 ~VPWidenCanonicalIVRecipe() override = default;
3385
3387 return new VPWidenCanonicalIVRecipe(
3388 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3389 }
3390
3391 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3392
3393 /// Generate a canonical vector induction variable of the vector loop, with
3394 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3395 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3396 void execute(VPTransformState &State) override;
3397
3398 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3400 VPCostContext &Ctx) const override {
3401 // TODO: Compute accurate cost after retiring the legacy cost model.
3402 return 0;
3403 }
3404
3405#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3406 /// Print the recipe.
3407 void print(raw_ostream &O, const Twine &Indent,
3408 VPSlotTracker &SlotTracker) const override;
3409#endif
3410};
3411
3412/// A recipe for converting the input value \p IV value to the corresponding
3413/// value of an IV with different start and step values, using Start + IV *
3414/// Step.
3416 /// Kind of the induction.
3418 /// If not nullptr, the floating point induction binary operator. Must be set
3419 /// for floating point inductions.
3420 const FPMathOperator *FPBinOp;
3421
3422 /// Name to use for the generated IR instruction for the derived IV.
3423 std::string Name;
3424
3425public:
3427 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3428 const Twine &Name = "")
3430 IndDesc.getKind(),
3431 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3432 Start, CanonicalIV, Step, Name) {}
3433
3435 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3436 VPValue *Step, const Twine &Name = "")
3437 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3438 FPBinOp(FPBinOp), Name(Name.str()) {}
3439
3440 ~VPDerivedIVRecipe() override = default;
3441
3443 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3444 getStepValue());
3445 }
3446
3447 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3448
3449 /// Generate the transformed value of the induction at offset StartValue (1.
3450 /// operand) + IV (2. operand) * StepValue (3, operand).
3451 void execute(VPTransformState &State) override;
3452
3453 /// Return the cost of this VPDerivedIVRecipe.
3455 VPCostContext &Ctx) const override {
3456 // TODO: Compute accurate cost after retiring the legacy cost model.
3457 return 0;
3458 }
3459
3460#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3461 /// Print the recipe.
3462 void print(raw_ostream &O, const Twine &Indent,
3463 VPSlotTracker &SlotTracker) const override;
3464#endif
3465
3467 return getStartValue()->getLiveInIRValue()->getType();
3468 }
3469
3470 VPValue *getStartValue() const { return getOperand(0); }
3471 VPValue *getStepValue() const { return getOperand(2); }
3472
3473 /// Returns true if the recipe only uses the first lane of operand \p Op.
3474 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3476 "Op must be an operand of the recipe");
3477 return true;
3478 }
3479};
3480
3481/// A recipe for handling phi nodes of integer and floating-point inductions,
3482/// producing their scalar values.
3484 public VPUnrollPartAccessor<2> {
3485 Instruction::BinaryOps InductionOpcode;
3486
3487public:
3490 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3491 ArrayRef<VPValue *>({IV, Step}), FMFs),
3492 InductionOpcode(Opcode) {}
3493
3495 VPValue *Step)
3497 IV, Step, IndDesc.getInductionOpcode(),
3498 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3499 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3500 : FastMathFlags()) {}
3501
3502 ~VPScalarIVStepsRecipe() override = default;
3503
3505 return new VPScalarIVStepsRecipe(
3506 getOperand(0), getOperand(1), InductionOpcode,
3508 }
3509
3510 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3511
3512 /// Generate the scalarized versions of the phi node as needed by their users.
3513 void execute(VPTransformState &State) override;
3514
3515 /// Return the cost of this VPScalarIVStepsRecipe.
3517 VPCostContext &Ctx) const override {
3518 // TODO: Compute accurate cost after retiring the legacy cost model.
3519 return 0;
3520 }
3521
3522#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3523 /// Print the recipe.
3524 void print(raw_ostream &O, const Twine &Indent,
3525 VPSlotTracker &SlotTracker) const override;
3526#endif
3527
3528 VPValue *getStepValue() const { return getOperand(1); }
3529
3530 /// Returns true if the recipe only uses the first lane of operand \p Op.
3531 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3533 "Op must be an operand of the recipe");
3534 return true;
3535 }
3536};
3537
3538/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3539/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3540/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3542 friend class VPlan;
3543
3544 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
3545 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3546 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3547 if (Recipe)
3548 appendRecipe(Recipe);
3549 }
3550
3551public:
3553
3554protected:
3555 /// The VPRecipes held in the order of output instructions to generate.
3557
3558 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3559 : VPBlockBase(BlockSC, Name.str()) {}
3560
3561public:
3562 ~VPBasicBlock() override {
3563 while (!Recipes.empty())
3564 Recipes.pop_back();
3565 }
3566
3567 /// Instruction iterators...
3572
3573 //===--------------------------------------------------------------------===//
3574 /// Recipe iterator methods
3575 ///
3576 inline iterator begin() { return Recipes.begin(); }
3577 inline const_iterator begin() const { return Recipes.begin(); }
3578 inline iterator end() { return Recipes.end(); }
3579 inline const_iterator end() const { return Recipes.end(); }
3580
3581 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3582 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3583 inline reverse_iterator rend() { return Recipes.rend(); }
3584 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3585
3586 inline size_t size() const { return Recipes.size(); }
3587 inline bool empty() const { return Recipes.empty(); }
3588 inline const VPRecipeBase &front() const { return Recipes.front(); }
3589 inline VPRecipeBase &front() { return Recipes.front(); }
3590 inline const VPRecipeBase &back() const { return Recipes.back(); }
3591 inline VPRecipeBase &back() { return Recipes.back(); }
3592
3593 /// Returns a reference to the list of recipes.
3595
3596 /// Returns a pointer to a member of the recipe list.
3598 return &VPBasicBlock::Recipes;
3599 }
3600
3601 /// Method to support type inquiry through isa, cast, and dyn_cast.
3602 static inline bool classof(const VPBlockBase *V) {
3603 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3604 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3605 }
3606
3607 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3608 assert(Recipe && "No recipe to append.");
3609 assert(!Recipe->Parent && "Recipe already in VPlan");
3610 Recipe->Parent = this;
3611 Recipes.insert(InsertPt, Recipe);
3612 }
3613
3614 /// Augment the existing recipes of a VPBasicBlock with an additional
3615 /// \p Recipe as the last recipe.
3616 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3617
3618 /// The method which generates the output IR instructions that correspond to
3619 /// this VPBasicBlock, thereby "executing" the VPlan.
3620 void execute(VPTransformState *State) override;
3621
3622 /// Return the cost of this VPBasicBlock.
3624
3625 /// Return the position of the first non-phi node recipe in the block.
3627
3628 /// Returns an iterator range over the PHI-like recipes in the block.
3630 return make_range(begin(), getFirstNonPhi());
3631 }
3632
3633 /// Split current block at \p SplitAt by inserting a new block between the
3634 /// current block and its successors and moving all recipes starting at
3635 /// SplitAt to the new block. Returns the new block.
3636 VPBasicBlock *splitAt(iterator SplitAt);
3637
3640
3641#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3642 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3643 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3644 ///
3645 /// Note that the numbering is applied to the whole VPlan, so printing
3646 /// individual blocks is consistent with the whole VPlan printing.
3647 void print(raw_ostream &O, const Twine &Indent,
3648 VPSlotTracker &SlotTracker) const override;
3649 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3650#endif
3651
3652 /// If the block has multiple successors, return the branch recipe terminating
3653 /// the block. If there are no or only a single successor, return nullptr;
3655 const VPRecipeBase *getTerminator() const;
3656
3657 /// Returns true if the block is exiting it's parent region.
3658 bool isExiting() const;
3659
3660 /// Clone the current block and it's recipes, without updating the operands of
3661 /// the cloned recipes.
3662 VPBasicBlock *clone() override;
3663
3664protected:
3665 /// Execute the recipes in the IR basic block \p BB.
3666 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3667
3668 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3669 /// generated for this VPBB.
3671
3672private:
3673 /// Create an IR BasicBlock to hold the output instructions generated by this
3674 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3675 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3676};
3677
3678/// A special type of VPBasicBlock that wraps an existing IR basic block.
3679/// Recipes of the block get added before the first non-phi instruction in the
3680/// wrapped block.
3681/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3682/// preheader block.
3684 friend class VPlan;
3685
3686 BasicBlock *IRBB;
3687
3688 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
3690 : VPBasicBlock(VPIRBasicBlockSC,
3691 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3692 IRBB(IRBB) {}
3693
3694public:
3695 ~VPIRBasicBlock() override {}
3696
3697 static inline bool classof(const VPBlockBase *V) {
3698 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3699 }
3700
3701 /// The method which generates the output IR instructions that correspond to
3702 /// this VPBasicBlock, thereby "executing" the VPlan.
3703 void execute(VPTransformState *State) override;
3704
3705 VPIRBasicBlock *clone() override;
3706
3707 BasicBlock *getIRBasicBlock() const { return IRBB; }
3708};
3709
3710/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3711/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3712/// A VPRegionBlock may indicate that its contents are to be replicated several
3713/// times. This is designed to support predicated scalarization, in which a
3714/// scalar if-then code structure needs to be generated VF * UF times. Having
3715/// this replication indicator helps to keep a single model for multiple
3716/// candidate VF's. The actual replication takes place only once the desired VF
3717/// and UF have been determined.
3719 friend class VPlan;
3720
3721 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3722 VPBlockBase *Entry;
3723
3724 /// Hold the Single Exiting block of the SESE region modelled by the
3725 /// VPRegionBlock.
3726 VPBlockBase *Exiting;
3727
3728 /// An indicator whether this region is to generate multiple replicated
3729 /// instances of output IR corresponding to its VPBlockBases.
3730 bool IsReplicator;
3731
3732 /// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
3733 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
3734 const std::string &Name = "", bool IsReplicator = false)
3735 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3736 IsReplicator(IsReplicator) {
3737 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3738 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3739 Entry->setParent(this);
3740 Exiting->setParent(this);
3741 }
3742 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3743 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3744 IsReplicator(IsReplicator) {}
3745
3746public:
3747 ~VPRegionBlock() override {}
3748
3749 /// Method to support type inquiry through isa, cast, and dyn_cast.
3750 static inline bool classof(const VPBlockBase *V) {
3751 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3752 }
3753
3754 const VPBlockBase *getEntry() const { return Entry; }
3755 VPBlockBase *getEntry() { return Entry; }
3756
3757 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3758 /// EntryBlock must have no predecessors.
3759 void setEntry(VPBlockBase *EntryBlock) {
3760 assert(EntryBlock->getPredecessors().empty() &&
3761 "Entry block cannot have predecessors.");
3762 Entry = EntryBlock;
3763 EntryBlock->setParent(this);
3764 }
3765
3766 const VPBlockBase *getExiting() const { return Exiting; }
3767 VPBlockBase *getExiting() { return Exiting; }
3768
3769 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3770 /// ExitingBlock must have no successors.
3771 void setExiting(VPBlockBase *ExitingBlock) {
3772 assert(ExitingBlock->getSuccessors().empty() &&
3773 "Exit block cannot have successors.");
3774 Exiting = ExitingBlock;
3775 ExitingBlock->setParent(this);
3776 }
3777
3778 /// Returns the pre-header VPBasicBlock of the loop region.
3780 assert(!isReplicator() && "should only get pre-header of loop regions");
3782 }
3783
3784 /// An indicator whether this region is to generate multiple replicated
3785 /// instances of output IR corresponding to its VPBlockBases.
3786 bool isReplicator() const { return IsReplicator; }
3787
3788 /// The method which generates the output IR instructions that correspond to
3789 /// this VPRegionBlock, thereby "executing" the VPlan.
3790 void execute(VPTransformState *State) override;
3791
3792 // Return the cost of this region.
3794
3795#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3796 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3797 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3798 /// consequtive numbers.
3799 ///
3800 /// Note that the numbering is applied to the whole VPlan, so printing
3801 /// individual regions is consistent with the whole VPlan printing.
3802 void print(raw_ostream &O, const Twine &Indent,
3803 VPSlotTracker &SlotTracker) const override;
3804 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3805#endif
3806
3807 /// Clone all blocks in the single-entry single-exit region of the block and
3808 /// their recipes without updating the operands of the cloned recipes.
3809 VPRegionBlock *clone() override;
3810};
3811
3812/// VPlan models a candidate for vectorization, encoding various decisions take
3813/// to produce efficient output IR, including which branches, basic-blocks and
3814/// output IR instructions to generate, and their cost. VPlan holds a
3815/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3816/// VPBasicBlock.
3817class VPlan {
3818 friend class VPlanPrinter;
3819 friend class VPSlotTracker;
3820
3821 /// VPBasicBlock corresponding to the original preheader. Used to place
3822 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3823 /// rest of VPlan execution.
3824 /// When this VPlan is used for the epilogue vector loop, the entry will be
3825 /// replaced by a new entry block created during skeleton creation.
3826 VPBasicBlock *Entry;
3827
3828 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3829 VPIRBasicBlock *ScalarHeader;
3830
3831 /// Holds the VFs applicable to this VPlan.
3833
3834 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3835 /// any UF.
3837
3838 /// Holds the name of the VPlan, for printing.
3839 std::string Name;
3840
3841 /// Represents the trip count of the original loop, for folding
3842 /// the tail.
3843 VPValue *TripCount = nullptr;
3844
3845 /// Represents the backedge taken count of the original loop, for folding
3846 /// the tail. It equals TripCount - 1.
3847 VPValue *BackedgeTakenCount = nullptr;
3848
3849 /// Represents the vector trip count.
3850 VPValue VectorTripCount;
3851
3852 /// Represents the vectorization factor of the loop.
3853 VPValue VF;
3854
3855 /// Represents the loop-invariant VF * UF of the vector loop region.
3856 VPValue VFxUF;
3857
3858 /// Holds a mapping between Values and their corresponding VPValue inside
3859 /// VPlan.
3860 Value2VPValueTy Value2VPValue;
3861
3862 /// Contains all the external definitions created for this VPlan. External
3863 /// definitions are VPValues that hold a pointer to their underlying IR.
3864 SmallVector<VPValue *, 16> VPLiveInsToFree;
3865
3866 /// Mapping from SCEVs to the VPValues representing their expansions.
3867 /// NOTE: This mapping is temporary and will be removed once all users have
3868 /// been modeled in VPlan directly.
3869 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3870
3871 /// Blocks allocated and owned by the VPlan. They will be deleted once the
3872 /// VPlan is destroyed.
3873 SmallVector<VPBlockBase *> CreatedBlocks;
3874
3875 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3876 /// wrapping the original header of the scalar loop.
3877 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3878 : Entry(Entry), ScalarHeader(ScalarHeader) {
3879 Entry->setPlan(this);
3880 assert(ScalarHeader->getNumSuccessors() == 0 &&
3881 "scalar header must be a leaf node");
3882 }
3883
3884public:
3885 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3886 /// original preheader and scalar header of \p L, to be used as entry and
3887 /// scalar header blocks of the new VPlan.
3888 VPlan(Loop *L);
3889
3890 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
3891 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
3892 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
3893 setEntry(createVPBasicBlock("preheader"));
3894 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
3895 TripCount = TC;
3896 }
3897
3898 ~VPlan();
3899
3901 Entry = VPBB;
3902 VPBB->setPlan(this);
3903 }
3904
3905 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3906 /// original scalar pre-header) which contains SCEV expansions that need
3907 /// to happen before the CFG is modified (when executing a VPlan for the
3908 /// epilogue vector loop, the original entry needs to be replaced by a new
3909 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3910 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3911 /// to guard executing the scalar epilogue loop, it will be added to the
3912 /// middle block, together with VPBasicBlocks for the scalar preheader and
3913 /// exit blocks. \p InductionTy is the type of the canonical induction and
3914 /// used for related values, like the trip count expression.
3915 static VPlanPtr createInitialVPlan(Type *InductionTy,
3917 bool RequiresScalarEpilogueCheck,
3918 bool TailFolded, Loop *TheLoop);
3919
3920 /// Prepare the plan for execution, setting up the required live-in values.
3921 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3922 VPTransformState &State);
3923
3924 /// Generate the IR code for this VPlan.
3925 void execute(VPTransformState *State);
3926
3927 /// Return the cost of this plan.
3929
3930 VPBasicBlock *getEntry() { return Entry; }
3931 const VPBasicBlock *getEntry() const { return Entry; }
3932
3933 /// Returns the preheader of the vector loop region, if one exists, or null
3934 /// otherwise.
3936 VPRegionBlock *VectorRegion = getVectorLoopRegion();
3937 return VectorRegion
3938 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
3939 : nullptr;
3940 }
3941
3942 /// Returns the VPRegionBlock of the vector loop.
3944 const VPRegionBlock *getVectorLoopRegion() const;
3945
3946 /// Returns the 'middle' block of the plan, that is the block that selects
3947 /// whether to execute the scalar tail loop or the exit block from the loop
3948 /// latch.
3950 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3951 }
3953 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3954 }
3955
3956 /// Return the VPBasicBlock for the preheader of the scalar loop.
3958 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3959 }
3960
3961 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3962 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3963
3964 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3965 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3966 /// VPlanHCFG, as the definition of the type needs access to the definitions
3967 /// of VPBlockShallowTraversalWrapper.
3968 auto getExitBlocks();
3969
3970 /// The trip count of the original loop.
3972 assert(TripCount && "trip count needs to be set before accessing it");
3973 return TripCount;
3974 }
3975
3976 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3977 /// the original trip count have been replaced.
3978 void resetTripCount(VPValue *NewTripCount) {
3979 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3980 "TripCount always must be set");
3981 TripCount = NewTripCount;
3982 }
3983
3984 /// The backedge taken count of the original loop.
3986 if (!BackedgeTakenCount)
3987 BackedgeTakenCount = new VPValue();
3988 return BackedgeTakenCount;
3989 }
3990
3991 /// The vector trip count.
3992 VPValue &getVectorTripCount() { return VectorTripCount; }
3993
3994 /// Returns the VF of the vector loop region.
3995 VPValue &getVF() { return VF; };
3996
3997 /// Returns VF * UF of the vector loop region.
3998 VPValue &getVFxUF() { return VFxUF; }
3999
4000 void addVF(ElementCount VF) { VFs.insert(VF); }
4001
4003 assert(hasVF(VF) && "Cannot set VF not already in plan");
4004 VFs.clear();
4005 VFs.insert(VF);
4006 }
4007
4008 bool hasVF(ElementCount VF) { return VFs.count(VF); }
4010 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4011 }
4012
4013 /// Returns an iterator range over all VFs of the plan.
4016 return {VFs.begin(), VFs.end()};
4017 }
4018
4019 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
4020
4021 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4022
4023 unsigned getUF() const {
4024 assert(UFs.size() == 1 && "Expected a single UF");
4025 return UFs[0];
4026 }
4027
4028 void setUF(unsigned UF) {
4029 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4030 UFs.clear();
4031 UFs.insert(UF);
4032 }
4033
4034 /// Return a string with the name of the plan and the applicable VFs and UFs.
4035 std::string getName() const;
4036
4037 void setName(const Twine &newName) { Name = newName.str(); }
4038
4039 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4040 /// yet) for \p V.
4042 assert(V && "Trying to get or add the VPValue of a null Value");
4043 if (!Value2VPValue.count(V)) {
4044 VPValue *VPV = new VPValue(V);
4045 VPLiveInsToFree.push_back(VPV);
4046 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4047 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
4048 Value2VPValue[V] = VPV;
4049 }
4050
4051 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
4052 assert(Value2VPValue[V]->isLiveIn() &&
4053 "Only live-ins should be in mapping");
4054 return Value2VPValue[V];
4055 }
4056
4057 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4058 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4059
4060#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4061 /// Print the live-ins of this VPlan to \p O.
4062 void printLiveIns(raw_ostream &O) const;
4063
4064 /// Print this VPlan to \p O.
4065 void print(raw_ostream &O) const;
4066
4067 /// Print this VPlan in DOT format to \p O.
4068 void printDOT(raw_ostream &O) const;
4069
4070 /// Dump the plan to stderr (for debugging).
4071 LLVM_DUMP_METHOD void dump() const;
4072#endif
4073
4074 /// Returns the canonical induction recipe of the vector loop.
4077 if (EntryVPBB->empty()) {
4078 // VPlan native path.
4079 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4080 }
4081 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4082 }
4083
4084 VPValue *getSCEVExpansion(const SCEV *S) const {
4085 return SCEVToExpansion.lookup(S);
4086 }
4087
4088 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4089 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4090 SCEVToExpansion[S] = V;
4091 }
4092
4093 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4094 /// recipes to refer to the clones, and return it.
4095 VPlan *duplicate();
4096
4097 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4098 /// present. The returned block is owned by the VPlan and deleted once the
4099 /// VPlan is destroyed.
4101 VPRecipeBase *Recipe = nullptr) {
4102 auto *VPB = new VPBasicBlock(Name, Recipe);
4103 CreatedBlocks.push_back(VPB);
4104 return VPB;
4105 }
4106
4107 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
4108 /// IsReplicator is true, the region is a replicate region. The returned block
4109 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4111 const std::string &Name = "",
4112 bool IsReplicator = false) {
4113 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
4114 CreatedBlocks.push_back(VPB);
4115 return VPB;
4116 }
4117
4118 /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set
4119 /// to nullptr. If \p IsReplicator is true, the region is a replicate region.
4120 /// The returned block is owned by the VPlan and deleted once the VPlan is
4121 /// destroyed.
4122 VPRegionBlock *createVPRegionBlock(const std::string &Name = "",
4123 bool IsReplicator = false) {
4124 auto *VPB = new VPRegionBlock(Name, IsReplicator);
4125 CreatedBlocks.push_back(VPB);
4126 return VPB;
4127 }
4128
4129 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4130 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4131 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4133
4134 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4135 /// instructions in \p IRBB, except its terminator which is managed by the
4136 /// successors of the block in VPlan. The returned block is owned by the VPlan
4137 /// and deleted once the VPlan is destroyed.
4139};
4140
4141#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4142/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4143/// indented and follows the dot format.
4145 raw_ostream &OS;
4146 const VPlan &Plan;
4147 unsigned Depth = 0;
4148 unsigned TabWidth = 2;
4149 std::string Indent;
4150 unsigned BID = 0;
4152
4154
4155 /// Handle indentation.
4156 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4157
4158 /// Print a given \p Block of the Plan.
4159 void dumpBlock(const VPBlockBase *Block);
4160
4161 /// Print the information related to the CFG edges going out of a given
4162 /// \p Block, followed by printing the successor blocks themselves.
4163 void dumpEdges(const VPBlockBase *Block);
4164
4165 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4166 /// its successor blocks.
4167 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4168
4169 /// Print a given \p Region of the Plan.
4170 void dumpRegion(const VPRegionBlock *Region);
4171
4172 unsigned getOrCreateBID(const VPBlockBase *Block) {
4173 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4174 }
4175
4176 Twine getOrCreateName(const VPBlockBase *Block);
4177
4178 Twine getUID(const VPBlockBase *Block);
4179
4180 /// Print the information related to a CFG edge between two VPBlockBases.
4181 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4182 const Twine &Label);
4183
4184public:
4186 : OS(O), Plan(P), SlotTracker(&P) {}
4187
4188 LLVM_DUMP_METHOD void dump();
4189};
4190
4192 const Value *V;
4193
4194 VPlanIngredient(const Value *V) : V(V) {}
4195
4196 void print(raw_ostream &O) const;
4197};
4198
4200 I.print(OS);
4201 return OS;
4202}
4203
4205 Plan.print(OS);
4206 return OS;
4207}
4208#endif
4209
4212 InterleaveGroupMap;
4213
4214 /// Type for mapping of instruction based interleave groups to VPInstruction
4215 /// interleave groups
4218
4219 /// Recursively \p Region and populate VPlan based interleave groups based on
4220 /// \p IAI.
4221 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4223 /// Recursively traverse \p Block and populate VPlan based interleave groups
4224 /// based on \p IAI.
4225 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4227
4228public:
4230
4233 // Avoid releasing a pointer twice.
4234 for (auto &I : InterleaveGroupMap)
4235 DelSet.insert(I.second);
4236 for (auto *Ptr : DelSet)
4237 delete Ptr;
4238 }
4239
4240 /// Get the interleave group that \p Instr belongs to.
4241 ///
4242 /// \returns nullptr if doesn't have such group.
4245 return InterleaveGroupMap.lookup(Instr);
4246 }
4247};
4248
4249/// Class that maps (parts of) an existing VPlan to trees of combined
4250/// VPInstructions.
4252 enum class OpMode { Failed, Load, Opcode };
4253
4254 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4255 /// DenseMap keys.
4256 struct BundleDenseMapInfo {
4257 static SmallVector<VPValue *, 4> getEmptyKey() {
4258 return {reinterpret_cast<VPValue *>(-1)};
4259 }
4260
4261 static SmallVector<VPValue *, 4> getTombstoneKey() {
4262 return {reinterpret_cast<VPValue *>(-2)};
4263 }
4264
4265 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4266 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4267 }
4268
4269 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4271 return LHS == RHS;
4272 }
4273 };
4274
4275 /// Mapping of values in the original VPlan to a combined VPInstruction.
4277 BundleToCombined;
4278
4280
4281 /// Basic block to operate on. For now, only instructions in a single BB are
4282 /// considered.
4283 const VPBasicBlock &BB;
4284
4285 /// Indicates whether we managed to combine all visited instructions or not.
4286 bool CompletelySLP = true;
4287
4288 /// Width of the widest combined bundle in bits.
4289 unsigned WidestBundleBits = 0;
4290
4291 using MultiNodeOpTy =
4292 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4293
4294 // Input operand bundles for the current multi node. Each multi node operand
4295 // bundle contains values not matching the multi node's opcode. They will
4296 // be reordered in reorderMultiNodeOps, once we completed building a
4297 // multi node.
4298 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4299
4300 /// Indicates whether we are building a multi node currently.
4301 bool MultiNodeActive = false;
4302
4303 /// Check if we can vectorize Operands together.
4304 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4305
4306 /// Add combined instruction \p New for the bundle \p Operands.
4307 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4308
4309 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4310 VPInstruction *markFailed();
4311
4312 /// Reorder operands in the multi node to maximize sequential memory access
4313 /// and commutative operations.
4314 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4315
4316 /// Choose the best candidate to use for the lane after \p Last. The set of
4317 /// candidates to choose from are values with an opcode matching \p Last's
4318 /// or loads consecutive to \p Last.
4319 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4320 SmallPtrSetImpl<VPValue *> &Candidates,
4322
4323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4324 /// Print bundle \p Values to dbgs().
4325 void dumpBundle(ArrayRef<VPValue *> Values);
4326#endif
4327
4328public:
4329 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4330
4331 ~VPlanSlp() = default;
4332
4333 /// Tries to build an SLP tree rooted at \p Operands and returns a
4334 /// VPInstruction combining \p Operands, if they can be combined.
4336
4337 /// Return the width of the widest combined bundle in bits.
4338 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4339
4340 /// Return true if all visited instruction can be combined.
4341 bool isCompletelySLP() const { return CompletelySLP; }
4342};
4343} // end namespace llvm
4344
4345#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
Flatten the CFG
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:816
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:310
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:630
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:198
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:195
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:122
ElementCount operator*() const
Definition: VPlan.h:130
iterator & operator++()
Definition: VPlan.h:132
iterator(ElementCount VF)
Definition: VPlan.h:126
bool operator==(const iterator &Other) const
Definition: VPlan.h:128
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3299
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3307
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3316
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3301
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3541
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3569
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3616
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3571
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3568
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition: VPlan.cpp:414
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:479
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3594
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3558
iterator end()
Definition: VPlan.h:3578
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3576
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3570
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:516
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3629
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:757
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:208
~VPBasicBlock() override
Definition: VPlan.h:3562
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:565
const_reverse_iterator rbegin() const
Definition: VPlan.h:3582
reverse_iterator rend()
Definition: VPlan.h:3583
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:535
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3556
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:523
VPRecipeBase & back()
Definition: VPlan.h:3591
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:630
const VPRecipeBase & front() const
Definition: VPlan.h:3588
const_iterator begin() const
Definition: VPlan.h:3577
VPRecipeBase & front()
Definition: VPlan.h:3589
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:613
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:601
const VPRecipeBase & back() const
Definition: VPlan.h:3590
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3607
bool empty() const
Definition: VPlan.h:3587
const_iterator end() const
Definition: VPlan.h:3579
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3602
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3597
reverse_iterator rbegin()
Definition: VPlan.h:3581
size_t size() const
Definition: VPlan.h:3586
const_reverse_iterator rend() const
Definition: VPlan.h:3584
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2499
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2505
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2552
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2528
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2533
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2523
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2510
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2519
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:392
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:608
VPRegionBlock * getParent()
Definition: VPlan.h:484
VPBlocksTy & getPredecessors()
Definition: VPlan.h:516
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:513
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:667
void setName(const Twine &newName)
Definition: VPlan.h:477
size_t getNumSuccessors() const
Definition: VPlan.h:530
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:512
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:623
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:618
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:636
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:657
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:566
size_t getNumPredecessors() const
Definition: VPlan.h:531
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:599
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:200
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:515
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:469
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:153
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:172
const VPRegionBlock * getParent() const
Definition: VPlan.h:485
const std::string & getName() const
Definition: VPlan.h:475
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:618
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:556
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:590
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:526
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:550
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:615
unsigned getVPBlockID() const
Definition: VPlan.h:482
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:643
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:461
VPBlocksTy & getSuccessors()
Definition: VPlan.h:510
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:192
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:579
void setParent(VPRegionBlock *P)
Definition: VPlan.h:495
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:572
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:520
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:509
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlanUtils.h:79
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2859
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2895
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2883
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2861
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2867
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2902
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
VPlan-based builder utility analogous to IRBuilder.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3238
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3281
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3253
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3245
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3240
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3274
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3269
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3257
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3288
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:298
unsigned getVPDefID() const
Definition: VPlanValue.h:426
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3415
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3454
VPValue * getStepValue() const
Definition: VPlan.h:3471
Type * getScalarType() const
Definition: VPlan.h:3466
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3442
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3434
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3474
VPValue * getStartValue() const
Definition: VPlan.h:3470
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3426
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3334
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3347
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3341
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3351
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3357
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3336
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3364
Recipe to expand a SCEV expression.
Definition: VPlan.h:3199
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:3204
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3219
const SCEV * getSCEV() const
Definition: VPlan.h:3231
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3209
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:2033
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
static bool classof(const VPValue *V)
Definition: VPlan.h:2050
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:2035
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2081
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2070
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2078
VPValue * getStartValue() const
Definition: VPlan.h:2073
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2046
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2087
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1783
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1795
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPHistogramRecipe(unsigned Opcode, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:1789
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1812
unsigned getOpcode() const
Definition: VPlan.h:1808
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3683
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:450
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3707
~VPIRBasicBlock() override
Definition: VPlan.h:3695
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3697
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:472
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
Definition: VPlan.h:1377
Instruction & getInstruction() const
Definition: VPlan.h:1401
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1415
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1421
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1388
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void extractLastLaneOfOperand(VPBuilder &Builder)
Update the recipes single operand to the last lane of the operand using Builder.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1409
VPIRInstruction(Instruction &I)
Definition: VPlan.h:1381
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1189
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1289
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1264
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1300
bool hasResult() const
Definition: VPlan.h:1330
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1370
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1315
unsigned getOpcode() const
Definition: VPlan.h:1307
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1276
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1269
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1207
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1195
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1210
@ CalculateTripCountMinusVF
Definition: VPlan.h:1208
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1281
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2566
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2649
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2607
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2578
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2613
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2599
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2620
Instruction * getInsertPos() const
Definition: VPlan.h:2655
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2640
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2644
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:4244
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:154
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:195
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:73
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:177
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:211
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:181
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:214
VPLane(unsigned Lane)
Definition: VPlan.h:176
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:201
static VPLane getFirstLane()
Definition: VPlan.h:179
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:157
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:217
A recipe for forming partial reductions.
Definition: VPlan.h:2452
~VPPartialReductionRecipe() override=default
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2488
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, Instruction *ReductionInst=nullptr)
Definition: VPlan.h:2460
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1)
Definition: VPlan.h:2456
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2473
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2914
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2946
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2922
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:2933
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2918
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:711
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:800
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:736
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:805
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:778
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:722
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:737
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:783
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:727
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:789
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:922
ExactFlagsTy ExactFlags
Definition: VPlan.h:972
FastMathFlagsTy FMFs
Definition: VPlan.h:975
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:974
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:969
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1142
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1024
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1103
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPNoWrapFlags GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1049
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1055
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1036
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1072
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1145
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:994
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:971
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1030
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1042
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:973
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:980
WrapFlagsTy WrapFlags
Definition: VPlan.h:970
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1149
bool isDisjoint() const
Definition: VPlan.h:1161
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1136
bool hasNoSignedWrap() const
Definition: VPlan.h:1155
static bool classof(const VPUser *U)
Definition: VPlan.h:1066
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:987
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2741
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2771
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2768
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2743
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2752
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2386
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2442
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2414
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:2403
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2445
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2424
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2437
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2661
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2726
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2696
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL={})
Definition: VPlan.h:2681
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition: VPlan.h:2669
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2730
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2720
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2732
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2724
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2728
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2690
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2701
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3718
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:701
const VPBlockBase * getEntry() const
Definition: VPlan.h:3754
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3786
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3771
VPBlockBase * getExiting()
Definition: VPlan.h:3767
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3759
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:764
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:802
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:710
const VPBlockBase * getExiting() const
Definition: VPlan.h:3766
VPBlockBase * getEntry()
Definition: VPlan.h:3755
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3779
~VPRegionBlock() override
Definition: VPlan.h:3747
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3750
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2782
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2831
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2838
bool isUniform() const
Definition: VPlan.h:2826
bool isPredicated() const
Definition: VPlan.h:2828
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2801
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2791
unsigned getOpcode() const
Definition: VPlan.h:2855
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2850
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
Definition: VPlan.h:1910
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1934
VPReverseVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1948
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1941
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1927
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1914
const VPValue * getVFValue() const
Definition: VPlan.h:1923
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1587
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarCastRecipe.
Definition: VPlan.h:1612
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1602
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1626
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
Definition: VPlan.h:1595
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1624
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3484
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3531
VPValue * getStepValue() const
Definition: VPlan.h:3528
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3516
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:3494
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3504
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:3488
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Recipe to generate a scalar PHI.
Definition: VPlan.h:2269
VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, StringRef Name)
Definition: VPlan.h:2273
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2292
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPScalarPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPScalarPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2282
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:838
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:844
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:908
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:853
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:911
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:841
static bool classof(const VPUser *U)
Definition: VPlan.h:900
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:849
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:447
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:1174
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:206
operand_range operands()
Definition: VPlanValue.h:263
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:248
unsigned getNumOperands() const
Definition: VPlanValue.h:242
operand_iterator op_end()
Definition: VPlanValue.h:261
operand_iterator op_begin()
Definition: VPlanValue.h:259
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:243
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:237
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1416
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:89
unsigned getNumUsers() const
Definition: VPlanValue.h:117
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:178
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:173
friend class VPRecipeBase
Definition: VPlanValue.h:56
user_range users()
Definition: VPlanValue.h:138
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1963
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1967
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1984
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1977
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1997
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1991
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1727
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1767
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1746
Function * getCalledScalarFunction() const
Definition: VPlan.h:1760
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1764
~VPWidenCallRecipe() override=default
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL={})
Definition: VPlan.h:1734
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3379
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3399
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3386
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3381
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1535
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1543
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1580
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1583
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1551
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1557
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1488
const VPValue * getEVL() const
Definition: VPlan.h:1512
~VPWidenEVLRecipe() override=default
VPWidenEVLRecipe(Instruction &I, iterator_range< IterT > Operands, VPValue &EVL)
Definition: VPlan.h:1493
VPWidenRecipe * clone() override final
Clone the current recipe.
Definition: VPlan.h:1504
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
Definition: VPlan.h:1497
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1511
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1519
A recipe for handling GEP instructions.
Definition: VPlan.h:1861
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1894
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1883
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1878
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2095
static bool classof(const VPValue *V)
Definition: VPlan.h:2111
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2131
PHINode * getPHINode() const
Definition: VPlan.h:2126
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2099
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2123
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2129
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2138
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2106
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2116
const VPValue * getStepValue() const
Definition: VPlan.h:2124
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2148
const TruncInst * getTruncInst() const
Definition: VPlan.h:2202
const VPValue * getVFValue() const
Definition: VPlan.h:2191
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2161
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2172
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2201
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2152
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2217
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2210
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1635
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, std::initializer_list< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1676
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1700
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1709
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1661
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1715
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1684
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1712
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1703
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1652
A common base class for widening memory operations.
Definition: VPlan.h:2955
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2966
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2963
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:3002
static bool classof(const VPUser *U)
Definition: VPlan.h:2996
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:3022
Instruction & Ingredient
Definition: VPlan.h:2957
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2985
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition: VPlan.h:3030
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2960
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2989
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2976
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3016
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:3012
void setMask(VPValue *Mask)
Definition: VPlan.h:2968
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:3009
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:3006
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:2308
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:2339
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr, DebugLoc DL={})
Create a new VPWidenPHIRecipe for Phi with start value Start and debug location DL.
Definition: VPlan.h:2315
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:2348
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2321
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:2345
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2238
~VPWidenPointerInductionRecipe() override=default
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:2229
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2255
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1437
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1453
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1448
unsigned getOpcode() const
Definition: VPlan.h:1477
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1442
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1459
static bool classof(const VPUser *U)
Definition: VPlan.h:1464
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:4144
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:4185
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1273
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:4251
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:4341
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:4329
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:4338
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3817
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1144
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1120
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:923
bool hasScalableVF()
Definition: VPlan.h:4009
VPBasicBlock * getEntry()
Definition: VPlan.h:3930
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4110
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3992
void setName(const Twine &newName)
Definition: VPlan.h:4037
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3998
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3995
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3971
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3985
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:4015
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition: VPlan.h:3892
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3931
unsigned getUF() const
Definition: VPlan.h:4023
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
Definition: VPlan.cpp:844
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition: VPlan.cpp:1245
bool hasVF(ElementCount VF)
Definition: VPlan.h:4008
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4088
bool hasUF(unsigned UF) const
Definition: VPlan.h:4021
void setVF(ElementCount VF)
Definition: VPlan.h:4002
VPRegionBlock * createVPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Name and entry and exiting blocks set to nullptr.
Definition: VPlan.h:4122
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:310
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1051
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1045
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3949
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3978
VPBasicBlock * getMiddleBlock()
Definition: VPlan.h:3952
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:3900
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4100
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1251
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4041
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1150
bool hasScalarVFOnly() const
Definition: VPlan.h:4019
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:3957
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:955
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4075
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1103
void addVF(ElementCount VF)
Definition: VPlan.h:4000
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:3962
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:4058
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4084
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1067
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:3935
void setUF(unsigned UF)
Definition: VPlan.h:4028
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:145
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Other
Any other memory.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:93
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:98
iterator end()
Definition: VPlan.h:139
const ElementCount Start
Definition: VPlan.h:100
ElementCount End
Definition: VPlan.h:103
iterator begin()
Definition: VPlan.h:138
bool isEmpty() const
Definition: VPlan.h:105
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:109
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:677
LLVMContext & LLVMCtx
Definition: VPlan.h:681
LoopVectorizationCostModel & CM
Definition: VPlan.h:682
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LoopVectorizationCostModel &CM, TargetTransformInfo::TargetCostKind CostKind)
Definition: VPlan.h:686
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1667
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
TargetTransformInfo::TargetCostKind CostKind
Definition: VPlan.h:684
VPTypeAnalysis Types
Definition: VPlan.h:680
const TargetLibraryInfo & TLI
Definition: VPlan.h:679
const TargetTransformInfo & TTI
Definition: VPlan.h:678
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:683
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2354
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2364
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2355
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2360
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:939
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:333
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:339
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:347
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:335
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:343
CFGState(DominatorTree *DT)
Definition: VPlan.h:352
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:347
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:350
DenseMap< VPValue *, Value * > VPV2Vector
Definition: VPlan.h:250
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
Definition: VPlan.h:252
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:231
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:264
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:262
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:361
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:384
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:387
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:360
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:393
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:380
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:352
void reset(VPValue *Def, Value *V, const VPLane &Lane)
Reset an existing scalar value for Def and a given Lane.
Definition: VPlan.h:301
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:245
void set(VPValue *Def, Value *V, const VPLane &Lane)
Set the generated scalar V for Def and the given Lane.
Definition: VPlan.h:291
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:364
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:237
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:370
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:367
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:285
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:240
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:371
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:373
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:274
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3074
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3086
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3102
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3075
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:3035
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3036
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3062
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3044
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1824
bool isInvariantCond() const
Definition: VPlan.h:1855
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1831
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1826
VPValue * getCond() const
Definition: VPlan.h:1851
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3154
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3165
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3184
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3155
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3168
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3113
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3142
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3114
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3130
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3121
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:4194
const Value * V
Definition: VPlan.h:4192
void print(raw_ostream &O) const
Definition: VPlan.cpp:1389