LLVM 19.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/Twine.h"
36#include "llvm/ADT/ilist.h"
37#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
87 Loop *CurLoop = nullptr);
88
89/// A helper function that returns the reciprocal of the block probability of
90/// predicated blocks. If we return X, we are assuming the predicated block
91/// will execute once for every X iterations of the loop header.
92///
93/// TODO: We should use actual block probability here, if available. Currently,
94/// we always assume predicated blocks have a 50% chance of executing.
95inline unsigned getReciprocalPredBlockProb() { return 2; }
96
97/// A range of powers-of-2 vectorization factors with fixed start and
98/// adjustable end. The range includes start and excludes end, e.g.,:
99/// [1, 16) = {1, 2, 4, 8}
100struct VFRange {
101 // A power of 2.
103
104 // A power of 2. If End <= Start range is empty.
106
107 bool isEmpty() const {
109 }
110
112 : Start(Start), End(End) {
114 "Both Start and End should have the same scalable flag");
116 "Expected Start to be a power of 2");
118 "Expected End to be a power of 2");
119 }
120
121 /// Iterator to iterate over vectorization factors in a VFRange.
123 : public iterator_facade_base<iterator, std::forward_iterator_tag,
124 ElementCount> {
125 ElementCount VF;
126
127 public:
128 iterator(ElementCount VF) : VF(VF) {}
129
130 bool operator==(const iterator &Other) const { return VF == Other.VF; }
131
132 ElementCount operator*() const { return VF; }
133
135 VF *= 2;
136 return *this;
137 }
138 };
139
143 return iterator(End);
144 }
145};
146
147using VPlanPtr = std::unique_ptr<VPlan>;
148
149/// In what follows, the term "input IR" refers to code that is fed into the
150/// vectorizer whereas the term "output IR" refers to code that is generated by
151/// the vectorizer.
152
153/// VPLane provides a way to access lanes in both fixed width and scalable
154/// vectors, where for the latter the lane index sometimes needs calculating
155/// as a runtime expression.
156class VPLane {
157public:
158 /// Kind describes how to interpret Lane.
159 enum class Kind : uint8_t {
160 /// For First, Lane is the index into the first N elements of a
161 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
162 First,
163 /// For ScalableLast, Lane is the offset from the start of the last
164 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
165 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
166 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
168 };
169
170private:
171 /// in [0..VF)
172 unsigned Lane;
173
174 /// Indicates how the Lane should be interpreted, as described above.
175 Kind LaneKind;
176
177public:
178 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
179
181
182 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
183 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
184 "trying to extract with invalid offset");
185 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
186 Kind LaneKind;
187 if (VF.isScalable())
188 // In this case 'LaneOffset' refers to the offset from the start of the
189 // last subvector with VF.getKnownMinValue() elements.
191 else
192 LaneKind = VPLane::Kind::First;
193 return VPLane(LaneOffset, LaneKind);
194 }
195
197 return getLaneFromEnd(VF, 1);
198 }
199
200 /// Returns a compile-time known value for the lane index and asserts if the
201 /// lane can only be calculated at runtime.
202 unsigned getKnownLane() const {
203 assert(LaneKind == Kind::First);
204 return Lane;
205 }
206
207 /// Returns an expression describing the lane index that can be used at
208 /// runtime.
209 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
210
211 /// Returns the Kind of lane offset.
212 Kind getKind() const { return LaneKind; }
213
214 /// Returns true if this is the first lane of the whole vector.
215 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
216
217 /// Maps the lane to a cache index based on \p VF.
218 unsigned mapToCacheIndex(const ElementCount &VF) const {
219 switch (LaneKind) {
221 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
222 return VF.getKnownMinValue() + Lane;
223 default:
224 assert(Lane < VF.getKnownMinValue());
225 return Lane;
226 }
227 }
228
229 /// Returns the maxmimum number of lanes that we are able to consider
230 /// caching for \p VF.
231 static unsigned getNumCachedLanes(const ElementCount &VF) {
232 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
233 }
234};
235
236/// VPIteration represents a single point in the iteration space of the output
237/// (vectorized and/or unrolled) IR loop.
239 /// in [0..UF)
240 unsigned Part;
241
243
244 VPIteration(unsigned Part, unsigned Lane,
246 : Part(Part), Lane(Lane, Kind) {}
247
248 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
249
250 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
251};
252
253/// VPTransformState holds information passed down when "executing" a VPlan,
254/// needed for generating the output IR.
259
260 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
262 unsigned UF;
263
264 /// Hold the indices to generate specific scalar instructions. Null indicates
265 /// that all instances are to be generated, using either scalar or vector
266 /// instructions.
267 std::optional<VPIteration> Instance;
268
269 struct DataState {
270 /// A type for vectorized values in the new loop. Each value from the
271 /// original loop, when vectorized, is represented by UF vector values in
272 /// the new unrolled loop, where UF is the unroll factor.
274
276
280
281 /// Get the generated vector Value for a given VPValue \p Def and a given \p
282 /// Part if \p IsScalar is false, otherwise return the generated scalar
283 /// for \p Part. \See set.
284 Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
285
286 /// Get the generated Value for a given VPValue and given Part and Lane.
287 Value *get(VPValue *Def, const VPIteration &Instance);
288
289 bool hasVectorValue(VPValue *Def, unsigned Part) {
290 auto I = Data.PerPartOutput.find(Def);
291 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
292 I->second[Part];
293 }
294
296 auto I = Data.PerPartScalars.find(Def);
297 if (I == Data.PerPartScalars.end())
298 return false;
299 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
300 return Instance.Part < I->second.size() &&
301 CacheIdx < I->second[Instance.Part].size() &&
302 I->second[Instance.Part][CacheIdx];
303 }
304
305 /// Set the generated vector Value for a given VPValue and a given Part, if \p
306 /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
307 void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
308 if (IsScalar) {
309 set(Def, V, VPIteration(Part, 0));
310 return;
311 }
312 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
313 "scalar values must be stored as (Part, 0)");
314 if (!Data.PerPartOutput.count(Def)) {
316 Data.PerPartOutput[Def] = Entry;
317 }
318 Data.PerPartOutput[Def][Part] = V;
319 }
320
321 /// Reset an existing vector value for \p Def and a given \p Part.
322 void reset(VPValue *Def, Value *V, unsigned Part) {
323 auto Iter = Data.PerPartOutput.find(Def);
324 assert(Iter != Data.PerPartOutput.end() &&
325 "need to overwrite existing value");
326 Iter->second[Part] = V;
327 }
328
329 /// Set the generated scalar \p V for \p Def and the given \p Instance.
330 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
331 auto Iter = Data.PerPartScalars.insert({Def, {}});
332 auto &PerPartVec = Iter.first->second;
333 if (PerPartVec.size() <= Instance.Part)
334 PerPartVec.resize(Instance.Part + 1);
335 auto &Scalars = PerPartVec[Instance.Part];
336 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
337 if (Scalars.size() <= CacheIdx)
338 Scalars.resize(CacheIdx + 1);
339 assert(!Scalars[CacheIdx] && "should overwrite existing value");
340 Scalars[CacheIdx] = V;
341 }
342
343 /// Reset an existing scalar value for \p Def and a given \p Instance.
344 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
345 auto Iter = Data.PerPartScalars.find(Def);
346 assert(Iter != Data.PerPartScalars.end() &&
347 "need to overwrite existing value");
348 assert(Instance.Part < Iter->second.size() &&
349 "need to overwrite existing value");
350 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
351 assert(CacheIdx < Iter->second[Instance.Part].size() &&
352 "need to overwrite existing value");
353 Iter->second[Instance.Part][CacheIdx] = V;
354 }
355
356 /// Add additional metadata to \p To that was not present on \p Orig.
357 ///
358 /// Currently this is used to add the noalias annotations based on the
359 /// inserted memchecks. Use this for instructions that are *cloned* into the
360 /// vector loop.
361 void addNewMetadata(Instruction *To, const Instruction *Orig);
362
363 /// Add metadata from one instruction to another.
364 ///
365 /// This includes both the original MDs from \p From and additional ones (\see
366 /// addNewMetadata). Use this for *newly created* instructions in the vector
367 /// loop.
368 void addMetadata(Value *To, Instruction *From);
369
370 /// Set the debug location in the builder using the debug location \p DL.
372
373 /// Construct the vector value of a scalarized value \p V one lane at a time.
375
376 /// Hold state information used when constructing the CFG of the output IR,
377 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
378 struct CFGState {
379 /// The previous VPBasicBlock visited. Initially set to null.
381
382 /// The previous IR BasicBlock created or used. Initially set to the new
383 /// header BasicBlock.
384 BasicBlock *PrevBB = nullptr;
385
386 /// The last IR BasicBlock in the output IR. Set to the exit block of the
387 /// vector loop.
388 BasicBlock *ExitBB = nullptr;
389
390 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
391 /// of replication, maps the BasicBlock of the last replica created.
393
394 /// Updater for the DominatorTree.
396
398 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
399
400 /// Returns the BasicBlock* mapped to the pre-header of the loop region
401 /// containing \p R.
404
405 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
407
408 /// Hold a reference to the IRBuilder used to generate output IR code.
410
411 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
413
414 /// Pointer to the VPlan code is generated for.
416
417 /// The loop object for the current parent region, or nullptr.
419
420 /// LoopVersioning. It's only set up (non-null) if memchecks were
421 /// used.
422 ///
423 /// This is currently only used to add no-alias metadata based on the
424 /// memchecks. The actually versioning is performed manually.
426
427 /// Map SCEVs to their expanded values. Populated when executing
428 /// VPExpandSCEVRecipes.
430
431 /// VPlan-based type analysis.
433};
434
435/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
436/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
438 friend class VPBlockUtils;
439
440 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
441
442 /// An optional name for the block.
443 std::string Name;
444
445 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
446 /// it is a topmost VPBlockBase.
447 VPRegionBlock *Parent = nullptr;
448
449 /// List of predecessor blocks.
451
452 /// List of successor blocks.
454
455 /// VPlan containing the block. Can only be set on the entry block of the
456 /// plan.
457 VPlan *Plan = nullptr;
458
459 /// Add \p Successor as the last successor to this block.
460 void appendSuccessor(VPBlockBase *Successor) {
461 assert(Successor && "Cannot add nullptr successor!");
462 Successors.push_back(Successor);
463 }
464
465 /// Add \p Predecessor as the last predecessor to this block.
466 void appendPredecessor(VPBlockBase *Predecessor) {
467 assert(Predecessor && "Cannot add nullptr predecessor!");
468 Predecessors.push_back(Predecessor);
469 }
470
471 /// Remove \p Predecessor from the predecessors of this block.
472 void removePredecessor(VPBlockBase *Predecessor) {
473 auto Pos = find(Predecessors, Predecessor);
474 assert(Pos && "Predecessor does not exist");
475 Predecessors.erase(Pos);
476 }
477
478 /// Remove \p Successor from the successors of this block.
479 void removeSuccessor(VPBlockBase *Successor) {
480 auto Pos = find(Successors, Successor);
481 assert(Pos && "Successor does not exist");
482 Successors.erase(Pos);
483 }
484
485protected:
486 VPBlockBase(const unsigned char SC, const std::string &N)
487 : SubclassID(SC), Name(N) {}
488
489public:
490 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
491 /// that are actually instantiated. Values of this enumeration are kept in the
492 /// SubclassID field of the VPBlockBase objects. They are used for concrete
493 /// type identification.
494 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
495
497
498 virtual ~VPBlockBase() = default;
499
500 const std::string &getName() const { return Name; }
501
502 void setName(const Twine &newName) { Name = newName.str(); }
503
504 /// \return an ID for the concrete type of this object.
505 /// This is used to implement the classof checks. This should not be used
506 /// for any other purpose, as the values may change as LLVM evolves.
507 unsigned getVPBlockID() const { return SubclassID; }
508
509 VPRegionBlock *getParent() { return Parent; }
510 const VPRegionBlock *getParent() const { return Parent; }
511
512 /// \return A pointer to the plan containing the current block.
513 VPlan *getPlan();
514 const VPlan *getPlan() const;
515
516 /// Sets the pointer of the plan containing the block. The block must be the
517 /// entry block into the VPlan.
518 void setPlan(VPlan *ParentPlan);
519
520 void setParent(VPRegionBlock *P) { Parent = P; }
521
522 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
523 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
524 /// VPBlockBase is a VPBasicBlock, it is returned.
525 const VPBasicBlock *getEntryBasicBlock() const;
527
528 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
529 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
530 /// VPBlockBase is a VPBasicBlock, it is returned.
531 const VPBasicBlock *getExitingBasicBlock() const;
533
534 const VPBlocksTy &getSuccessors() const { return Successors; }
535 VPBlocksTy &getSuccessors() { return Successors; }
536
538
539 const VPBlocksTy &getPredecessors() const { return Predecessors; }
540 VPBlocksTy &getPredecessors() { return Predecessors; }
541
542 /// \return the successor of this VPBlockBase if it has a single successor.
543 /// Otherwise return a null pointer.
545 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
546 }
547
548 /// \return the predecessor of this VPBlockBase if it has a single
549 /// predecessor. Otherwise return a null pointer.
551 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
552 }
553
554 size_t getNumSuccessors() const { return Successors.size(); }
555 size_t getNumPredecessors() const { return Predecessors.size(); }
556
557 /// An Enclosing Block of a block B is any block containing B, including B
558 /// itself. \return the closest enclosing block starting from "this", which
559 /// has successors. \return the root enclosing block if all enclosing blocks
560 /// have no successors.
562
563 /// \return the closest enclosing block starting from "this", which has
564 /// predecessors. \return the root enclosing block if all enclosing blocks
565 /// have no predecessors.
567
568 /// \return the successors either attached directly to this VPBlockBase or, if
569 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
570 /// successors of its own, search recursively for the first enclosing
571 /// VPRegionBlock that has successors and return them. If no such
572 /// VPRegionBlock exists, return the (empty) successors of the topmost
573 /// VPBlockBase reached.
576 }
577
578 /// \return the hierarchical successor of this VPBlockBase if it has a single
579 /// hierarchical successor. Otherwise return a null pointer.
582 }
583
584 /// \return the predecessors either attached directly to this VPBlockBase or,
585 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
586 /// predecessors of its own, search recursively for the first enclosing
587 /// VPRegionBlock that has predecessors and return them. If no such
588 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
589 /// VPBlockBase reached.
592 }
593
594 /// \return the hierarchical predecessor of this VPBlockBase if it has a
595 /// single hierarchical predecessor. Otherwise return a null pointer.
598 }
599
600 /// Set a given VPBlockBase \p Successor as the single successor of this
601 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
602 /// This VPBlockBase must have no successors.
604 assert(Successors.empty() && "Setting one successor when others exist.");
605 assert(Successor->getParent() == getParent() &&
606 "connected blocks must have the same parent");
607 appendSuccessor(Successor);
608 }
609
610 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
611 /// successors of this VPBlockBase. This VPBlockBase is not added as
612 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
613 /// successors.
614 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
615 assert(Successors.empty() && "Setting two successors when others exist.");
616 appendSuccessor(IfTrue);
617 appendSuccessor(IfFalse);
618 }
619
620 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
621 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
622 /// as successor of any VPBasicBlock in \p NewPreds.
624 assert(Predecessors.empty() && "Block predecessors already set.");
625 for (auto *Pred : NewPreds)
626 appendPredecessor(Pred);
627 }
628
629 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
630 /// This VPBlockBase must have no successors. This VPBlockBase is not added
631 /// as predecessor of any VPBasicBlock in \p NewSuccs.
633 assert(Successors.empty() && "Block successors already set.");
634 for (auto *Succ : NewSuccs)
635 appendSuccessor(Succ);
636 }
637
638 /// Remove all the predecessor of this block.
639 void clearPredecessors() { Predecessors.clear(); }
640
641 /// Remove all the successors of this block.
642 void clearSuccessors() { Successors.clear(); }
643
644 /// The method which generates the output IR that correspond to this
645 /// VPBlockBase, thereby "executing" the VPlan.
646 virtual void execute(VPTransformState *State) = 0;
647
648 /// Return the cost of the block.
650
651 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
652 static void deleteCFG(VPBlockBase *Entry);
653
654 /// Return true if it is legal to hoist instructions into this block.
656 // There are currently no constraints that prevent an instruction to be
657 // hoisted into a VPBlockBase.
658 return true;
659 }
660
661 /// Replace all operands of VPUsers in the block with \p NewValue and also
662 /// replaces all uses of VPValues defined in the block with NewValue.
663 virtual void dropAllReferences(VPValue *NewValue) = 0;
664
665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
666 void printAsOperand(raw_ostream &OS, bool PrintType) const {
667 OS << getName();
668 }
669
670 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
671 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
672 /// consequtive numbers.
673 ///
674 /// Note that the numbering is applied to the whole VPlan, so printing
675 /// individual blocks is consistent with the whole VPlan printing.
676 virtual void print(raw_ostream &O, const Twine &Indent,
677 VPSlotTracker &SlotTracker) const = 0;
678
679 /// Print plain-text dump of this VPlan to \p O.
680 void print(raw_ostream &O) const {
682 print(O, "", SlotTracker);
683 }
684
685 /// Print the successors of this block to \p O, prefixing all lines with \p
686 /// Indent.
687 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
688
689 /// Dump this VPBlockBase to dbgs().
690 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
691#endif
692
693 /// Clone the current block and it's recipes without updating the operands of
694 /// the cloned recipes, including all blocks in the single-entry single-exit
695 /// region for VPRegionBlocks.
696 virtual VPBlockBase *clone() = 0;
697};
698
699/// A value that is used outside the VPlan. The operand of the user needs to be
700/// added to the associated phi node. The incoming block from VPlan is
701/// determined by where the VPValue is defined: if it is defined by a recipe
702/// outside a region, its parent block is used, otherwise the middle block is
703/// used.
704class VPLiveOut : public VPUser {
705 PHINode *Phi;
706
707public:
709 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
710
711 static inline bool classof(const VPUser *U) {
712 return U->getVPUserID() == VPUser::VPUserID::LiveOut;
713 }
714
715 /// Fix the wrapped phi node. This means adding an incoming value to exit
716 /// block phi's from the vector loop via middle block (values from scalar loop
717 /// already reach these phi's), and updating the value to scalar header phi's
718 /// from the scalar preheader.
719 void fixPhi(VPlan &Plan, VPTransformState &State);
720
721 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
722 bool usesScalars(const VPValue *Op) const override {
724 "Op must be an operand of the recipe");
725 return true;
726 }
727
728 PHINode *getPhi() const { return Phi; }
729
730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
731 /// Print the VPLiveOut to \p O.
733#endif
734};
735
736/// Struct to hold various analysis needed for cost computations.
743
746 : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
747
748 /// Return the cost for \p UI with \p VF using the legacy cost model as
749 /// fallback until computing the cost of all recipes migrates to VPlan.
751
752 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
753 /// has already been pre-computed.
754 bool skipCostComputation(Instruction *UI, bool IsVector) const;
755};
756
757/// VPRecipeBase is a base class modeling a sequence of one or more output IR
758/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
759/// and is responsible for deleting its defined values. Single-value
760/// recipes must inherit from VPSingleDef instead of inheriting from both
761/// VPRecipeBase and VPValue separately.
762class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
763 public VPDef,
764 public VPUser {
765 friend VPBasicBlock;
766 friend class VPBlockUtils;
767
768 /// Each VPRecipe belongs to a single VPBasicBlock.
769 VPBasicBlock *Parent = nullptr;
770
771 /// The debug location for the recipe.
772 DebugLoc DL;
773
774public:
776 DebugLoc DL = {})
778
779 template <typename IterT>
781 DebugLoc DL = {})
783 virtual ~VPRecipeBase() = default;
784
785 /// Clone the current recipe.
786 virtual VPRecipeBase *clone() = 0;
787
788 /// \return the VPBasicBlock which this VPRecipe belongs to.
789 VPBasicBlock *getParent() { return Parent; }
790 const VPBasicBlock *getParent() const { return Parent; }
791
792 /// The method which generates the output IR instructions that correspond to
793 /// this VPRecipe, thereby "executing" the VPlan.
794 virtual void execute(VPTransformState &State) = 0;
795
796 /// Return the cost of this recipe, taking into account if the cost
797 /// computation should be skipped and the ForceTargetInstructionCost flag.
798 /// Also takes care of printing the cost for debugging.
800
801 /// Insert an unlinked recipe into a basic block immediately before
802 /// the specified recipe.
803 void insertBefore(VPRecipeBase *InsertPos);
804 /// Insert an unlinked recipe into \p BB immediately before the insertion
805 /// point \p IP;
807
808 /// Insert an unlinked Recipe into a basic block immediately after
809 /// the specified Recipe.
810 void insertAfter(VPRecipeBase *InsertPos);
811
812 /// Unlink this recipe from its current VPBasicBlock and insert it into
813 /// the VPBasicBlock that MovePos lives in, right after MovePos.
814 void moveAfter(VPRecipeBase *MovePos);
815
816 /// Unlink this recipe and insert into BB before I.
817 ///
818 /// \pre I is a valid iterator into BB.
820
821 /// This method unlinks 'this' from the containing basic block, but does not
822 /// delete it.
823 void removeFromParent();
824
825 /// This method unlinks 'this' from the containing basic block and deletes it.
826 ///
827 /// \returns an iterator pointing to the element after the erased one
829
830 /// Method to support type inquiry through isa, cast, and dyn_cast.
831 static inline bool classof(const VPDef *D) {
832 // All VPDefs are also VPRecipeBases.
833 return true;
834 }
835
836 static inline bool classof(const VPUser *U) {
837 return U->getVPUserID() == VPUser::VPUserID::Recipe;
838 }
839
840 /// Returns true if the recipe may have side-effects.
841 bool mayHaveSideEffects() const;
842
843 /// Returns true for PHI-like recipes.
844 bool isPhi() const {
845 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
846 }
847
848 /// Returns true if the recipe may read from memory.
849 bool mayReadFromMemory() const;
850
851 /// Returns true if the recipe may write to memory.
852 bool mayWriteToMemory() const;
853
854 /// Returns true if the recipe may read from or write to memory.
855 bool mayReadOrWriteMemory() const {
857 }
858
859 /// Returns the debug location of the recipe.
860 DebugLoc getDebugLoc() const { return DL; }
861
862protected:
863 /// Compute the cost of this recipe using the legacy cost model and the
864 /// underlying instructions.
866};
867
868// Helper macro to define common classof implementations for recipes.
869#define VP_CLASSOF_IMPL(VPDefID) \
870 static inline bool classof(const VPDef *D) { \
871 return D->getVPDefID() == VPDefID; \
872 } \
873 static inline bool classof(const VPValue *V) { \
874 auto *R = V->getDefiningRecipe(); \
875 return R && R->getVPDefID() == VPDefID; \
876 } \
877 static inline bool classof(const VPUser *U) { \
878 auto *R = dyn_cast<VPRecipeBase>(U); \
879 return R && R->getVPDefID() == VPDefID; \
880 } \
881 static inline bool classof(const VPRecipeBase *R) { \
882 return R->getVPDefID() == VPDefID; \
883 } \
884 static inline bool classof(const VPSingleDefRecipe *R) { \
885 return R->getVPDefID() == VPDefID; \
886 }
887
888/// VPSingleDef is a base class for recipes for modeling a sequence of one or
889/// more output IR that define a single result VPValue.
890/// Note that VPRecipeBase must be inherited from before VPValue.
891class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
892public:
893 template <typename IterT>
894 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
895 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
896
897 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
898 DebugLoc DL = {})
899 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
900
901 template <typename IterT>
902 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
903 DebugLoc DL = {})
904 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
905
906 static inline bool classof(const VPRecipeBase *R) {
907 switch (R->getVPDefID()) {
908 case VPRecipeBase::VPDerivedIVSC:
909 case VPRecipeBase::VPEVLBasedIVPHISC:
910 case VPRecipeBase::VPExpandSCEVSC:
911 case VPRecipeBase::VPInstructionSC:
912 case VPRecipeBase::VPReductionEVLSC:
913 case VPRecipeBase::VPReductionSC:
914 case VPRecipeBase::VPReplicateSC:
915 case VPRecipeBase::VPScalarIVStepsSC:
916 case VPRecipeBase::VPVectorPointerSC:
917 case VPRecipeBase::VPWidenCallSC:
918 case VPRecipeBase::VPWidenCanonicalIVSC:
919 case VPRecipeBase::VPWidenCastSC:
920 case VPRecipeBase::VPWidenGEPSC:
921 case VPRecipeBase::VPWidenSC:
922 case VPRecipeBase::VPWidenSelectSC:
923 case VPRecipeBase::VPBlendSC:
924 case VPRecipeBase::VPPredInstPHISC:
925 case VPRecipeBase::VPCanonicalIVPHISC:
926 case VPRecipeBase::VPActiveLaneMaskPHISC:
927 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
928 case VPRecipeBase::VPWidenPHISC:
929 case VPRecipeBase::VPWidenIntOrFpInductionSC:
930 case VPRecipeBase::VPWidenPointerInductionSC:
931 case VPRecipeBase::VPReductionPHISC:
932 case VPRecipeBase::VPScalarCastSC:
933 return true;
934 case VPRecipeBase::VPInterleaveSC:
935 case VPRecipeBase::VPBranchOnMaskSC:
936 case VPRecipeBase::VPWidenLoadEVLSC:
937 case VPRecipeBase::VPWidenLoadSC:
938 case VPRecipeBase::VPWidenStoreEVLSC:
939 case VPRecipeBase::VPWidenStoreSC:
940 // TODO: Widened stores don't define a value, but widened loads do. Split
941 // the recipes to be able to make widened loads VPSingleDefRecipes.
942 return false;
943 }
944 llvm_unreachable("Unhandled VPDefID");
945 }
946
947 static inline bool classof(const VPUser *U) {
948 auto *R = dyn_cast<VPRecipeBase>(U);
949 return R && classof(R);
950 }
951
952 virtual VPSingleDefRecipe *clone() override = 0;
953
954 /// Returns the underlying instruction.
956 return cast<Instruction>(getUnderlyingValue());
957 }
959 return cast<Instruction>(getUnderlyingValue());
960 }
961};
962
963/// Class to record LLVM IR flag for a recipe along with it.
965 enum class OperationType : unsigned char {
966 Cmp,
967 OverflowingBinOp,
968 DisjointOp,
969 PossiblyExactOp,
970 GEPOp,
971 FPMathOp,
972 NonNegOp,
973 Other
974 };
975
976public:
977 struct WrapFlagsTy {
978 char HasNUW : 1;
979 char HasNSW : 1;
980
982 };
983
985 char IsDisjoint : 1;
987 };
988
989protected:
990 struct GEPFlagsTy {
991 char IsInBounds : 1;
993 };
994
995private:
996 struct ExactFlagsTy {
997 char IsExact : 1;
998 };
999 struct NonNegFlagsTy {
1000 char NonNeg : 1;
1001 };
1002 struct FastMathFlagsTy {
1003 char AllowReassoc : 1;
1004 char NoNaNs : 1;
1005 char NoInfs : 1;
1006 char NoSignedZeros : 1;
1007 char AllowReciprocal : 1;
1008 char AllowContract : 1;
1009 char ApproxFunc : 1;
1010
1011 FastMathFlagsTy(const FastMathFlags &FMF);
1012 };
1013
1014 OperationType OpType;
1015
1016 union {
1020 ExactFlagsTy ExactFlags;
1022 NonNegFlagsTy NonNegFlags;
1023 FastMathFlagsTy FMFs;
1024 unsigned AllFlags;
1025 };
1026
1027protected:
1029 OpType = Other.OpType;
1030 AllFlags = Other.AllFlags;
1031 }
1032
1033public:
1034 template <typename IterT>
1035 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
1036 : VPSingleDefRecipe(SC, Operands, DL) {
1037 OpType = OperationType::Other;
1038 AllFlags = 0;
1039 }
1040
1041 template <typename IterT>
1042 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1044 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1045 OpType = OperationType::Cmp;
1046 CmpPredicate = Op->getPredicate();
1047 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1048 OpType = OperationType::DisjointOp;
1049 DisjointFlags.IsDisjoint = Op->isDisjoint();
1050 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1051 OpType = OperationType::OverflowingBinOp;
1052 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1053 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1054 OpType = OperationType::PossiblyExactOp;
1055 ExactFlags.IsExact = Op->isExact();
1056 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1057 OpType = OperationType::GEPOp;
1058 GEPFlags.IsInBounds = GEP->isInBounds();
1059 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1060 OpType = OperationType::NonNegOp;
1061 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1062 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1063 OpType = OperationType::FPMathOp;
1064 FMFs = Op->getFastMathFlags();
1065 } else {
1066 OpType = OperationType::Other;
1067 AllFlags = 0;
1068 }
1069 }
1070
1071 template <typename IterT>
1072 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1073 CmpInst::Predicate Pred, DebugLoc DL = {})
1074 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1075 CmpPredicate(Pred) {}
1076
1077 template <typename IterT>
1078 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1080 : VPSingleDefRecipe(SC, Operands, DL),
1081 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1082
1083 template <typename IterT>
1084 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1085 FastMathFlags FMFs, DebugLoc DL = {})
1086 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1087 FMFs(FMFs) {}
1088
1089 template <typename IterT>
1090 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1092 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1094
1095protected:
1096 template <typename IterT>
1097 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1098 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1099 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1100 GEPFlags(GEPFlags) {}
1101
1102public:
1103 static inline bool classof(const VPRecipeBase *R) {
1104 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1105 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1106 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1107 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1108 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1109 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1110 }
1111
1112 static inline bool classof(const VPUser *U) {
1113 auto *R = dyn_cast<VPRecipeBase>(U);
1114 return R && classof(R);
1115 }
1116
1117 /// Drop all poison-generating flags.
1119 // NOTE: This needs to be kept in-sync with
1120 // Instruction::dropPoisonGeneratingFlags.
1121 switch (OpType) {
1122 case OperationType::OverflowingBinOp:
1123 WrapFlags.HasNUW = false;
1124 WrapFlags.HasNSW = false;
1125 break;
1126 case OperationType::DisjointOp:
1127 DisjointFlags.IsDisjoint = false;
1128 break;
1129 case OperationType::PossiblyExactOp:
1130 ExactFlags.IsExact = false;
1131 break;
1132 case OperationType::GEPOp:
1133 GEPFlags.IsInBounds = false;
1134 break;
1135 case OperationType::FPMathOp:
1136 FMFs.NoNaNs = false;
1137 FMFs.NoInfs = false;
1138 break;
1139 case OperationType::NonNegOp:
1140 NonNegFlags.NonNeg = false;
1141 break;
1142 case OperationType::Cmp:
1143 case OperationType::Other:
1144 break;
1145 }
1146 }
1147
1148 /// Set the IR flags for \p I.
1149 void setFlags(Instruction *I) const {
1150 switch (OpType) {
1151 case OperationType::OverflowingBinOp:
1152 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1153 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1154 break;
1155 case OperationType::DisjointOp:
1156 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1157 break;
1158 case OperationType::PossiblyExactOp:
1159 I->setIsExact(ExactFlags.IsExact);
1160 break;
1161 case OperationType::GEPOp:
1162 // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1163 cast<GetElementPtrInst>(I)->setNoWrapFlags(
1166 break;
1167 case OperationType::FPMathOp:
1168 I->setHasAllowReassoc(FMFs.AllowReassoc);
1169 I->setHasNoNaNs(FMFs.NoNaNs);
1170 I->setHasNoInfs(FMFs.NoInfs);
1171 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1172 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1173 I->setHasAllowContract(FMFs.AllowContract);
1174 I->setHasApproxFunc(FMFs.ApproxFunc);
1175 break;
1176 case OperationType::NonNegOp:
1177 I->setNonNeg(NonNegFlags.NonNeg);
1178 break;
1179 case OperationType::Cmp:
1180 case OperationType::Other:
1181 break;
1182 }
1183 }
1184
1186 assert(OpType == OperationType::Cmp &&
1187 "recipe doesn't have a compare predicate");
1188 return CmpPredicate;
1189 }
1190
1191 bool isInBounds() const {
1192 assert(OpType == OperationType::GEPOp &&
1193 "recipe doesn't have inbounds flag");
1194 return GEPFlags.IsInBounds;
1195 }
1196
1197 /// Returns true if the recipe has fast-math flags.
1198 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1199
1201
1202 bool hasNoUnsignedWrap() const {
1203 assert(OpType == OperationType::OverflowingBinOp &&
1204 "recipe doesn't have a NUW flag");
1205 return WrapFlags.HasNUW;
1206 }
1207
1208 bool hasNoSignedWrap() const {
1209 assert(OpType == OperationType::OverflowingBinOp &&
1210 "recipe doesn't have a NSW flag");
1211 return WrapFlags.HasNSW;
1212 }
1213
1214 bool isDisjoint() const {
1215 assert(OpType == OperationType::DisjointOp &&
1216 "recipe cannot have a disjoing flag");
1218 }
1219
1220#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1221 void printFlags(raw_ostream &O) const;
1222#endif
1223};
1224
1225/// This is a concrete Recipe that models a single VPlan-level instruction.
1226/// While as any Recipe it may generate a sequence of IR instructions when
1227/// executed, these instructions would always form a single-def expression as
1228/// the VPInstruction is also a single def-use vertex.
1230 friend class VPlanSlp;
1231
1232public:
1233 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1234 enum {
1236 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1237 // values of a first-order recurrence.
1243 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1244 /// The first operand is the incoming value from the predecessor in VPlan,
1245 /// the second operand is the incoming value for all other predecessors
1246 /// (which are currently not modeled in VPlan).
1249 // Increment the canonical IV separately for each unrolled part.
1254 // Takes the VPValue to extract from as first operand and the lane or part
1255 // to extract as second operand, counting from the end starting with 1 for
1256 // last. The second operand must be a positive constant and <= VF when
1257 // extracting from a vector or <= UF when extracting from an unrolled
1258 // scalar.
1260 LogicalAnd, // Non-poison propagating logical And.
1261 // Add an offset in bytes (second operand) to a base pointer (first
1262 // operand). Only generates scalar values (either for the first lane only or
1263 // for all lanes, depending on its uses).
1265 };
1266
1267private:
1268 typedef unsigned char OpcodeTy;
1269 OpcodeTy Opcode;
1270
1271 /// An optional name that can be used for the generated IR instruction.
1272 const std::string Name;
1273
1274 /// Returns true if this VPInstruction generates scalar values for all lanes.
1275 /// Most VPInstructions generate a single value per part, either vector or
1276 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1277 /// values per all lanes, stemming from an original ingredient. This method
1278 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1279 /// underlying ingredient.
1280 bool doesGeneratePerAllLanes() const;
1281
1282 /// Returns true if we can generate a scalar for the first lane only if
1283 /// needed.
1284 bool canGenerateScalarForFirstLane() const;
1285
1286 /// Utility methods serving execute(): generates a single instance of the
1287 /// modeled instruction for a given part. \returns the generated value for \p
1288 /// Part. In some cases an existing value is returned rather than a generated
1289 /// one.
1290 Value *generatePerPart(VPTransformState &State, unsigned Part);
1291
1292 /// Utility methods serving execute(): generates a scalar single instance of
1293 /// the modeled instruction for a given lane. \returns the scalar generated
1294 /// value for lane \p Lane.
1295 Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1296
1297#if !defined(NDEBUG)
1298 /// Return true if the VPInstruction is a floating point math operation, i.e.
1299 /// has fast-math flags.
1300 bool isFPMathOp() const;
1301#endif
1302
1303public:
1305 const Twine &Name = "")
1306 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1307 Opcode(Opcode), Name(Name.str()) {}
1308
1309 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1310 DebugLoc DL = {}, const Twine &Name = "")
1312
1313 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1314 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1315
1316 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1317 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1318 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1319 Opcode(Opcode), Name(Name.str()) {}
1320
1321 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1322 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1323 const Twine &Name = "")
1324 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1325 Opcode(Opcode), Name(Name.str()) {
1326 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1327 }
1328
1329 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1330 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1331
1332 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1333
1334 VPInstruction *clone() override {
1336 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1337 New->transferFlags(*this);
1338 return New;
1339 }
1340
1341 unsigned getOpcode() const { return Opcode; }
1342
1343 /// Generate the instruction.
1344 /// TODO: We currently execute only per-part unless a specific instance is
1345 /// provided.
1346 void execute(VPTransformState &State) override;
1347
1348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1349 /// Print the VPInstruction to \p O.
1350 void print(raw_ostream &O, const Twine &Indent,
1351 VPSlotTracker &SlotTracker) const override;
1352
1353 /// Print the VPInstruction to dbgs() (for debugging).
1354 LLVM_DUMP_METHOD void dump() const;
1355#endif
1356
1357 /// Return true if this instruction may modify memory.
1358 bool mayWriteToMemory() const {
1359 // TODO: we can use attributes of the called function to rule out memory
1360 // modifications.
1361 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1362 Opcode == Instruction::Invoke || Opcode == SLPStore;
1363 }
1364
1365 bool hasResult() const {
1366 // CallInst may or may not have a result, depending on the called function.
1367 // Conservatively return calls have results for now.
1368 switch (getOpcode()) {
1369 case Instruction::Ret:
1370 case Instruction::Br:
1371 case Instruction::Store:
1372 case Instruction::Switch:
1373 case Instruction::IndirectBr:
1374 case Instruction::Resume:
1375 case Instruction::CatchRet:
1376 case Instruction::Unreachable:
1377 case Instruction::Fence:
1378 case Instruction::AtomicRMW:
1381 return false;
1382 default:
1383 return true;
1384 }
1385 }
1386
1387 /// Returns true if the recipe only uses the first lane of operand \p Op.
1388 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1389
1390 /// Returns true if the recipe only uses the first part of operand \p Op.
1391 bool onlyFirstPartUsed(const VPValue *Op) const override;
1392
1393 /// Returns true if this VPInstruction produces a scalar value from a vector,
1394 /// e.g. by performing a reduction or extracting a lane.
1395 bool isVectorToScalar() const;
1396
1397 /// Returns true if this VPInstruction's operands are single scalars and the
1398 /// result is also a single scalar.
1399 bool isSingleScalar() const;
1400};
1401
1402/// VPWidenRecipe is a recipe for producing a copy of vector type its
1403/// ingredient. This recipe covers most of the traditional vectorization cases
1404/// where each ingredient transforms into a vectorized version of itself.
1406 unsigned Opcode;
1407
1408public:
1409 template <typename IterT>
1411 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1412 Opcode(I.getOpcode()) {}
1413
1414 ~VPWidenRecipe() override = default;
1415
1416 VPWidenRecipe *clone() override {
1417 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1418 R->transferFlags(*this);
1419 return R;
1420 }
1421
1422 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1423
1424 /// Produce widened copies of all Ingredients.
1425 void execute(VPTransformState &State) override;
1426
1427 unsigned getOpcode() const { return Opcode; }
1428
1429#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1430 /// Print the recipe.
1431 void print(raw_ostream &O, const Twine &Indent,
1432 VPSlotTracker &SlotTracker) const override;
1433#endif
1434};
1435
1436/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1438 /// Cast instruction opcode.
1439 Instruction::CastOps Opcode;
1440
1441 /// Result type for the cast.
1442 Type *ResultTy;
1443
1444public:
1446 CastInst &UI)
1447 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1448 ResultTy(ResultTy) {
1449 assert(UI.getOpcode() == Opcode &&
1450 "opcode of underlying cast doesn't match");
1451 }
1452
1454 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1455 ResultTy(ResultTy) {}
1456
1457 ~VPWidenCastRecipe() override = default;
1458
1460 if (auto *UV = getUnderlyingValue())
1461 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1462 *cast<CastInst>(UV));
1463
1464 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1465 }
1466
1467 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1468
1469 /// Produce widened copies of the cast.
1470 void execute(VPTransformState &State) override;
1471
1472#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1473 /// Print the recipe.
1474 void print(raw_ostream &O, const Twine &Indent,
1475 VPSlotTracker &SlotTracker) const override;
1476#endif
1477
1478 Instruction::CastOps getOpcode() const { return Opcode; }
1479
1480 /// Returns the result type of the cast.
1481 Type *getResultType() const { return ResultTy; }
1482};
1483
1484/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1486 Instruction::CastOps Opcode;
1487
1488 Type *ResultTy;
1489
1490 Value *generate(VPTransformState &State, unsigned Part);
1491
1492public:
1494 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1495 ResultTy(ResultTy) {}
1496
1497 ~VPScalarCastRecipe() override = default;
1498
1500 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1501 }
1502
1503 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1504
1505 void execute(VPTransformState &State) override;
1506
1507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1508 void print(raw_ostream &O, const Twine &Indent,
1509 VPSlotTracker &SlotTracker) const override;
1510#endif
1511
1512 /// Returns the result type of the cast.
1513 Type *getResultType() const { return ResultTy; }
1514
1515 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1516 // At the moment, only uniform codegen is implemented.
1518 "Op must be an operand of the recipe");
1519 return true;
1520 }
1521};
1522
1523/// A recipe for widening Call instructions.
1525 /// ID of the vector intrinsic to call when widening the call. If set the
1526 /// Intrinsic::not_intrinsic, a library call will be used instead.
1527 Intrinsic::ID VectorIntrinsicID;
1528 /// If this recipe represents a library call, Variant stores a pointer to
1529 /// the chosen function. There is a 1:1 mapping between a given VF and the
1530 /// chosen vectorized variant, so there will be a different vplan for each
1531 /// VF with a valid variant.
1532 Function *Variant;
1533
1534public:
1535 template <typename IterT>
1537 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1538 Function *Variant = nullptr)
1539 : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1540 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1541 assert(
1542 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1543 "last operand must be the called function");
1544 }
1545
1546 ~VPWidenCallRecipe() override = default;
1547
1550 VectorIntrinsicID, getDebugLoc(), Variant);
1551 }
1552
1553 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1554
1555 /// Produce a widened version of the call instruction.
1556 void execute(VPTransformState &State) override;
1557
1559 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1560 }
1561
1563 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1564 }
1566 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1567 }
1568
1569#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1570 /// Print the recipe.
1571 void print(raw_ostream &O, const Twine &Indent,
1572 VPSlotTracker &SlotTracker) const override;
1573#endif
1574};
1575
1576/// A recipe for widening select instructions.
1578 template <typename IterT>
1580 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1581 I.getDebugLoc()) {}
1582
1583 ~VPWidenSelectRecipe() override = default;
1584
1586 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1587 operands());
1588 }
1589
1590 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1591
1592 /// Produce a widened version of the select instruction.
1593 void execute(VPTransformState &State) override;
1594
1595#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1596 /// Print the recipe.
1597 void print(raw_ostream &O, const Twine &Indent,
1598 VPSlotTracker &SlotTracker) const override;
1599#endif
1600
1601 VPValue *getCond() const {
1602 return getOperand(0);
1603 }
1604
1605 bool isInvariantCond() const {
1607 }
1608};
1609
1610/// A recipe for handling GEP instructions.
1612 bool isPointerLoopInvariant() const {
1614 }
1615
1616 bool isIndexLoopInvariant(unsigned I) const {
1618 }
1619
1620 bool areAllOperandsInvariant() const {
1621 return all_of(operands(), [](VPValue *Op) {
1622 return Op->isDefinedOutsideVectorRegions();
1623 });
1624 }
1625
1626public:
1627 template <typename IterT>
1629 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1630
1631 ~VPWidenGEPRecipe() override = default;
1632
1634 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1635 operands());
1636 }
1637
1638 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1639
1640 /// Generate the gep nodes.
1641 void execute(VPTransformState &State) override;
1642
1643#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1644 /// Print the recipe.
1645 void print(raw_ostream &O, const Twine &Indent,
1646 VPSlotTracker &SlotTracker) const override;
1647#endif
1648};
1649
1650/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1651/// all parts. If IsReverse is true, compute pointers for accessing the input in
1652/// reverse order per part.
1654 Type *IndexedTy;
1655 bool IsReverse;
1656
1657public:
1658 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1659 bool IsInBounds, DebugLoc DL)
1660 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1661 GEPFlagsTy(IsInBounds), DL),
1662 IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1663
1664 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1665
1666 void execute(VPTransformState &State) override;
1667
1668 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1670 "Op must be an operand of the recipe");
1671 return true;
1672 }
1673
1675 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1676 isInBounds(), getDebugLoc());
1677 }
1678
1679#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1680 /// Print the recipe.
1681 void print(raw_ostream &O, const Twine &Indent,
1682 VPSlotTracker &SlotTracker) const override;
1683#endif
1684};
1685
1686/// A pure virtual base class for all recipes modeling header phis, including
1687/// phis for first order recurrences, pointer inductions and reductions. The
1688/// start value is the first operand of the recipe and the incoming value from
1689/// the backedge is the second operand.
1690///
1691/// Inductions are modeled using the following sub-classes:
1692/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1693/// starting at a specified value (zero for the main vector loop, the resume
1694/// value for the epilogue vector loop) and stepping by 1. The induction
1695/// controls exiting of the vector loop by comparing against the vector trip
1696/// count. Produces a single scalar PHI for the induction value per
1697/// iteration.
1698/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1699/// floating point inductions with arbitrary start and step values. Produces
1700/// a vector PHI per-part.
1701/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1702/// value of an IV with different start and step values. Produces a single
1703/// scalar value per iteration
1704/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1705/// canonical or derived induction.
1706/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1707/// pointer induction. Produces either a vector PHI per-part or scalar values
1708/// per-lane based on the canonical induction.
1710protected:
1711 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1712 VPValue *Start = nullptr, DebugLoc DL = {})
1713 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1714 if (Start)
1715 addOperand(Start);
1716 }
1717
1718public:
1719 ~VPHeaderPHIRecipe() override = default;
1720
1721 /// Method to support type inquiry through isa, cast, and dyn_cast.
1722 static inline bool classof(const VPRecipeBase *B) {
1723 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1724 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1725 }
1726 static inline bool classof(const VPValue *V) {
1727 auto *B = V->getDefiningRecipe();
1728 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1729 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1730 }
1731
1732 /// Generate the phi nodes.
1733 void execute(VPTransformState &State) override = 0;
1734
1735#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1736 /// Print the recipe.
1737 void print(raw_ostream &O, const Twine &Indent,
1738 VPSlotTracker &SlotTracker) const override = 0;
1739#endif
1740
1741 /// Returns the start value of the phi, if one is set.
1743 return getNumOperands() == 0 ? nullptr : getOperand(0);
1744 }
1746 return getNumOperands() == 0 ? nullptr : getOperand(0);
1747 }
1748
1749 /// Update the start value of the recipe.
1751
1752 /// Returns the incoming value from the loop backedge.
1754 return getOperand(1);
1755 }
1756
1757 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1758 /// to be a recipe.
1761 }
1762};
1763
1764/// A recipe for handling phi nodes of integer and floating-point inductions,
1765/// producing their vector values.
1767 PHINode *IV;
1768 TruncInst *Trunc;
1769 const InductionDescriptor &IndDesc;
1770
1771public:
1773 const InductionDescriptor &IndDesc)
1774 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1775 Trunc(nullptr), IndDesc(IndDesc) {
1776 addOperand(Step);
1777 }
1778
1780 const InductionDescriptor &IndDesc,
1781 TruncInst *Trunc)
1782 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1783 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1784 addOperand(Step);
1785 }
1786
1788
1791 getStepValue(), IndDesc, Trunc);
1792 }
1793
1794 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1795
1796 /// Generate the vectorized and scalarized versions of the phi node as
1797 /// needed by their users.
1798 void execute(VPTransformState &State) override;
1799
1800#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1801 /// Print the recipe.
1802 void print(raw_ostream &O, const Twine &Indent,
1803 VPSlotTracker &SlotTracker) const override;
1804#endif
1805
1807 // TODO: All operands of base recipe must exist and be at same index in
1808 // derived recipe.
1810 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1811 }
1812
1814 // TODO: All operands of base recipe must exist and be at same index in
1815 // derived recipe.
1817 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1818 }
1819
1820 /// Returns the step value of the induction.
1822 const VPValue *getStepValue() const { return getOperand(1); }
1823
1824 /// Returns the first defined value as TruncInst, if it is one or nullptr
1825 /// otherwise.
1826 TruncInst *getTruncInst() { return Trunc; }
1827 const TruncInst *getTruncInst() const { return Trunc; }
1828
1829 PHINode *getPHINode() { return IV; }
1830
1831 /// Returns the induction descriptor for the recipe.
1832 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1833
1834 /// Returns true if the induction is canonical, i.e. starting at 0 and
1835 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
1836 /// same type as the canonical induction.
1837 bool isCanonical() const;
1838
1839 /// Returns the scalar type of the induction.
1841 return Trunc ? Trunc->getType() : IV->getType();
1842 }
1843};
1844
1846 const InductionDescriptor &IndDesc;
1847
1848 bool IsScalarAfterVectorization;
1849
1850public:
1851 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1852 /// Start.
1854 const InductionDescriptor &IndDesc,
1855 bool IsScalarAfterVectorization)
1856 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1857 IndDesc(IndDesc),
1858 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1859 addOperand(Start);
1860 addOperand(Step);
1861 }
1862
1864
1867 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
1868 IndDesc, IsScalarAfterVectorization);
1869 }
1870
1871 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1872
1873 /// Generate vector values for the pointer induction.
1874 void execute(VPTransformState &State) override;
1875
1876 /// Returns true if only scalar values will be generated.
1877 bool onlyScalarsGenerated(bool IsScalable);
1878
1879 /// Returns the induction descriptor for the recipe.
1880 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1881
1882#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1883 /// Print the recipe.
1884 void print(raw_ostream &O, const Twine &Indent,
1885 VPSlotTracker &SlotTracker) const override;
1886#endif
1887};
1888
1889/// A recipe for handling phis that are widened in the vector loop.
1890/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1891/// managed in the recipe directly.
1893 /// List of incoming blocks. Only used in the VPlan native path.
1894 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1895
1896public:
1897 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1898 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1899 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1900 if (Start)
1901 addOperand(Start);
1902 }
1903
1905 llvm_unreachable("cloning not implemented yet");
1906 }
1907
1908 ~VPWidenPHIRecipe() override = default;
1909
1910 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1911
1912 /// Generate the phi/select nodes.
1913 void execute(VPTransformState &State) override;
1914
1915#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1916 /// Print the recipe.
1917 void print(raw_ostream &O, const Twine &Indent,
1918 VPSlotTracker &SlotTracker) const override;
1919#endif
1920
1921 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1922 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1923 addOperand(IncomingV);
1924 IncomingBlocks.push_back(IncomingBlock);
1925 }
1926
1927 /// Returns the \p I th incoming VPBasicBlock.
1928 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1929
1930 /// Returns the \p I th incoming VPValue.
1931 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1932};
1933
1934/// A recipe for handling first-order recurrence phis. The start value is the
1935/// first operand of the recipe and the incoming value from the backedge is the
1936/// second operand.
1939 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1940
1941 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1942
1944 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1945 }
1946
1949 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
1950 }
1951
1952 void execute(VPTransformState &State) override;
1953
1954#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1955 /// Print the recipe.
1956 void print(raw_ostream &O, const Twine &Indent,
1957 VPSlotTracker &SlotTracker) const override;
1958#endif
1959};
1960
1961/// A recipe for handling reduction phis. The start value is the first operand
1962/// of the recipe and the incoming value from the backedge is the second
1963/// operand.
1965 /// Descriptor for the reduction.
1966 const RecurrenceDescriptor &RdxDesc;
1967
1968 /// The phi is part of an in-loop reduction.
1969 bool IsInLoop;
1970
1971 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1972 bool IsOrdered;
1973
1974public:
1975 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1976 /// RdxDesc.
1978 VPValue &Start, bool IsInLoop = false,
1979 bool IsOrdered = false)
1980 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1981 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1982 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1983 }
1984
1985 ~VPReductionPHIRecipe() override = default;
1986
1988 auto *R =
1989 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
1990 *getOperand(0), IsInLoop, IsOrdered);
1991 R->addOperand(getBackedgeValue());
1992 return R;
1993 }
1994
1995 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1996
1998 return R->getVPDefID() == VPDef::VPReductionPHISC;
1999 }
2000
2001 /// Generate the phi/select nodes.
2002 void execute(VPTransformState &State) override;
2003
2004#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2005 /// Print the recipe.
2006 void print(raw_ostream &O, const Twine &Indent,
2007 VPSlotTracker &SlotTracker) const override;
2008#endif
2009
2011 return RdxDesc;
2012 }
2013
2014 /// Returns true, if the phi is part of an ordered reduction.
2015 bool isOrdered() const { return IsOrdered; }
2016
2017 /// Returns true, if the phi is part of an in-loop reduction.
2018 bool isInLoop() const { return IsInLoop; }
2019};
2020
2021/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2022/// instructions.
2024public:
2025 /// The blend operation is a User of the incoming values and of their
2026 /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
2027 /// incoming value does not have a mask associated.
2029 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2030 assert((Operands.size() + 1) % 2 == 0 &&
2031 "Expected an odd number of operands");
2032 }
2033
2034 VPBlendRecipe *clone() override {
2036 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2037 }
2038
2039 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2040
2041 /// Return the number of incoming values, taking into account that the first
2042 /// incoming value has no mask.
2043 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
2044
2045 /// Return incoming value number \p Idx.
2046 VPValue *getIncomingValue(unsigned Idx) const {
2047 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
2048 }
2049
2050 /// Return mask number \p Idx.
2051 VPValue *getMask(unsigned Idx) const {
2052 assert(Idx > 0 && "First index has no mask associated.");
2053 return getOperand(Idx * 2);
2054 }
2055
2056 /// Generate the phi/select nodes.
2057 void execute(VPTransformState &State) override;
2058
2059#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2060 /// Print the recipe.
2061 void print(raw_ostream &O, const Twine &Indent,
2062 VPSlotTracker &SlotTracker) const override;
2063#endif
2064
2065 /// Returns true if the recipe only uses the first lane of operand \p Op.
2066 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2068 "Op must be an operand of the recipe");
2069 // Recursing through Blend recipes only, must terminate at header phi's the
2070 // latest.
2071 return all_of(users(),
2072 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2073 }
2074};
2075
2076/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2077/// or stores into one wide load/store and shuffles. The first operand of a
2078/// VPInterleave recipe is the address, followed by the stored values, followed
2079/// by an optional mask.
2082
2083 /// Indicates if the interleave group is in a conditional block and requires a
2084 /// mask.
2085 bool HasMask = false;
2086
2087 /// Indicates if gaps between members of the group need to be masked out or if
2088 /// unusued gaps can be loaded speculatively.
2089 bool NeedsMaskForGaps = false;
2090
2091public:
2093 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2094 bool NeedsMaskForGaps)
2095 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2096 NeedsMaskForGaps(NeedsMaskForGaps) {
2097 for (unsigned i = 0; i < IG->getFactor(); ++i)
2098 if (Instruction *I = IG->getMember(i)) {
2099 if (I->getType()->isVoidTy())
2100 continue;
2101 new VPValue(I, this);
2102 }
2103
2104 for (auto *SV : StoredValues)
2105 addOperand(SV);
2106 if (Mask) {
2107 HasMask = true;
2108 addOperand(Mask);
2109 }
2110 }
2111 ~VPInterleaveRecipe() override = default;
2112
2114 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2115 NeedsMaskForGaps);
2116 }
2117
2118 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2119
2120 /// Return the address accessed by this recipe.
2121 VPValue *getAddr() const {
2122 return getOperand(0); // Address is the 1st, mandatory operand.
2123 }
2124
2125 /// Return the mask used by this recipe. Note that a full mask is represented
2126 /// by a nullptr.
2127 VPValue *getMask() const {
2128 // Mask is optional and therefore the last, currently 2nd operand.
2129 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2130 }
2131
2132 /// Return the VPValues stored by this interleave group. If it is a load
2133 /// interleave group, return an empty ArrayRef.
2135 // The first operand is the address, followed by the stored values, followed
2136 // by an optional mask.
2139 }
2140
2141 /// Generate the wide load or store, and shuffles.
2142 void execute(VPTransformState &State) override;
2143
2144#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2145 /// Print the recipe.
2146 void print(raw_ostream &O, const Twine &Indent,
2147 VPSlotTracker &SlotTracker) const override;
2148#endif
2149
2151
2152 /// Returns the number of stored operands of this interleave group. Returns 0
2153 /// for load interleave groups.
2154 unsigned getNumStoreOperands() const {
2155 return getNumOperands() - (HasMask ? 2 : 1);
2156 }
2157
2158 /// The recipe only uses the first lane of the address.
2159 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2161 "Op must be an operand of the recipe");
2162 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2163 }
2164
2165 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2166};
2167
2168/// A recipe to represent inloop reduction operations, performing a reduction on
2169/// a vector operand into a scalar value, and adding the result to a chain.
2170/// The Operands are {ChainOp, VecOp, [Condition]}.
2172 /// The recurrence decriptor for the reduction in question.
2173 const RecurrenceDescriptor &RdxDesc;
2174 bool IsOrdered;
2175 /// Whether the reduction is conditional.
2176 bool IsConditional = false;
2177
2178protected:
2179 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2181 VPValue *CondOp, bool IsOrdered)
2182 : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2183 if (CondOp) {
2184 IsConditional = true;
2185 addOperand(CondOp);
2186 }
2187 }
2188
2189public:
2191 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2192 bool IsOrdered)
2193 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2194 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2195 IsOrdered) {}
2196
2197 ~VPReductionRecipe() override = default;
2198
2200 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2201 getVecOp(), getCondOp(), IsOrdered);
2202 }
2203
2204 static inline bool classof(const VPRecipeBase *R) {
2205 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2206 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2207 }
2208
2209 static inline bool classof(const VPUser *U) {
2210 auto *R = dyn_cast<VPRecipeBase>(U);
2211 return R && classof(R);
2212 }
2213
2214 /// Generate the reduction in the loop
2215 void execute(VPTransformState &State) override;
2216
2217#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2218 /// Print the recipe.
2219 void print(raw_ostream &O, const Twine &Indent,
2220 VPSlotTracker &SlotTracker) const override;
2221#endif
2222
2223 /// Return the recurrence decriptor for the in-loop reduction.
2225 return RdxDesc;
2226 }
2227 /// Return true if the in-loop reduction is ordered.
2228 bool isOrdered() const { return IsOrdered; };
2229 /// Return true if the in-loop reduction is conditional.
2230 bool isConditional() const { return IsConditional; };
2231 /// The VPValue of the scalar Chain being accumulated.
2232 VPValue *getChainOp() const { return getOperand(0); }
2233 /// The VPValue of the vector value to be reduced.
2234 VPValue *getVecOp() const { return getOperand(1); }
2235 /// The VPValue of the condition for the block.
2237 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2238 }
2239};
2240
2241/// A recipe to represent inloop reduction operations with vector-predication
2242/// intrinsics, performing a reduction on a vector operand with the explicit
2243/// vector length (EVL) into a scalar value, and adding the result to a chain.
2244/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2246public:
2249 VPDef::VPReductionEVLSC, R->getRecurrenceDescriptor(),
2251 ArrayRef<VPValue *>({R->getChainOp(), R->getVecOp(), EVL}), CondOp,
2252 R->isOrdered()) {}
2253
2254 ~VPReductionEVLRecipe() override = default;
2255
2257 llvm_unreachable("cloning not implemented yet");
2258 }
2259
2260 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2261
2262 /// Generate the reduction in the loop
2263 void execute(VPTransformState &State) override;
2264
2265#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2266 /// Print the recipe.
2267 void print(raw_ostream &O, const Twine &Indent,
2268 VPSlotTracker &SlotTracker) const override;
2269#endif
2270
2271 /// The VPValue of the explicit vector length.
2272 VPValue *getEVL() const { return getOperand(2); }
2273
2274 /// Returns true if the recipe only uses the first lane of operand \p Op.
2275 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2277 "Op must be an operand of the recipe");
2278 return Op == getEVL();
2279 }
2280};
2281
2282/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2283/// copies of the original scalar type, one per lane, instead of producing a
2284/// single copy of widened type for all lanes. If the instruction is known to be
2285/// uniform only one copy, per lane zero, will be generated.
2287 /// Indicator if only a single replica per lane is needed.
2288 bool IsUniform;
2289
2290 /// Indicator if the replicas are also predicated.
2291 bool IsPredicated;
2292
2293public:
2294 template <typename IterT>
2296 bool IsUniform, VPValue *Mask = nullptr)
2297 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2298 IsUniform(IsUniform), IsPredicated(Mask) {
2299 if (Mask)
2300 addOperand(Mask);
2301 }
2302
2303 ~VPReplicateRecipe() override = default;
2304
2306 auto *Copy =
2307 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2308 isPredicated() ? getMask() : nullptr);
2309 Copy->transferFlags(*this);
2310 return Copy;
2311 }
2312
2313 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2314
2315 /// Generate replicas of the desired Ingredient. Replicas will be generated
2316 /// for all parts and lanes unless a specific part and lane are specified in
2317 /// the \p State.
2318 void execute(VPTransformState &State) override;
2319
2320#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2321 /// Print the recipe.
2322 void print(raw_ostream &O, const Twine &Indent,
2323 VPSlotTracker &SlotTracker) const override;
2324#endif
2325
2326 bool isUniform() const { return IsUniform; }
2327
2328 bool isPredicated() const { return IsPredicated; }
2329
2330 /// Returns true if the recipe only uses the first lane of operand \p Op.
2331 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2333 "Op must be an operand of the recipe");
2334 return isUniform();
2335 }
2336
2337 /// Returns true if the recipe uses scalars of operand \p Op.
2338 bool usesScalars(const VPValue *Op) const override {
2340 "Op must be an operand of the recipe");
2341 return true;
2342 }
2343
2344 /// Returns true if the recipe is used by a widened recipe via an intervening
2345 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2346 /// in a vector.
2347 bool shouldPack() const;
2348
2349 /// Return the mask of a predicated VPReplicateRecipe.
2351 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2352 return getOperand(getNumOperands() - 1);
2353 }
2354
2355 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2356};
2357
2358/// A recipe for generating conditional branches on the bits of a mask.
2360public:
2362 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2363 if (BlockInMask) // nullptr means all-one mask.
2364 addOperand(BlockInMask);
2365 }
2366
2368 return new VPBranchOnMaskRecipe(getOperand(0));
2369 }
2370
2371 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2372
2373 /// Generate the extraction of the appropriate bit from the block mask and the
2374 /// conditional branch.
2375 void execute(VPTransformState &State) override;
2376
2377#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2378 /// Print the recipe.
2379 void print(raw_ostream &O, const Twine &Indent,
2380 VPSlotTracker &SlotTracker) const override {
2381 O << Indent << "BRANCH-ON-MASK ";
2382 if (VPValue *Mask = getMask())
2383 Mask->printAsOperand(O, SlotTracker);
2384 else
2385 O << " All-One";
2386 }
2387#endif
2388
2389 /// Return the mask used by this recipe. Note that a full mask is represented
2390 /// by a nullptr.
2391 VPValue *getMask() const {
2392 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2393 // Mask is optional.
2394 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2395 }
2396
2397 /// Returns true if the recipe uses scalars of operand \p Op.
2398 bool usesScalars(const VPValue *Op) const override {
2400 "Op must be an operand of the recipe");
2401 return true;
2402 }
2403};
2404
2405/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2406/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2407/// order to merge values that are set under such a branch and feed their uses.
2408/// The phi nodes can be scalar or vector depending on the users of the value.
2409/// This recipe works in concert with VPBranchOnMaskRecipe.
2411public:
2412 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2413 /// nodes after merging back from a Branch-on-Mask.
2415 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2416 ~VPPredInstPHIRecipe() override = default;
2417
2419 return new VPPredInstPHIRecipe(getOperand(0));
2420 }
2421
2422 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2423
2424 /// Generates phi nodes for live-outs as needed to retain SSA form.
2425 void execute(VPTransformState &State) override;
2426
2427#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2428 /// Print the recipe.
2429 void print(raw_ostream &O, const Twine &Indent,
2430 VPSlotTracker &SlotTracker) const override;
2431#endif
2432
2433 /// Returns true if the recipe uses scalars of operand \p Op.
2434 bool usesScalars(const VPValue *Op) const override {
2436 "Op must be an operand of the recipe");
2437 return true;
2438 }
2439};
2440
2441/// A common base class for widening memory operations. An optional mask can be
2442/// provided as the last operand.
2444protected:
2446
2447 /// Whether the accessed addresses are consecutive.
2449
2450 /// Whether the consecutive accessed addresses are in reverse order.
2452
2453 /// Whether the memory access is masked.
2454 bool IsMasked = false;
2455
2456 void setMask(VPValue *Mask) {
2457 assert(!IsMasked && "cannot re-set mask");
2458 if (!Mask)
2459 return;
2460 addOperand(Mask);
2461 IsMasked = true;
2462 }
2463
2464 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2465 std::initializer_list<VPValue *> Operands,
2466 bool Consecutive, bool Reverse, DebugLoc DL)
2468 Reverse(Reverse) {
2469 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2470 }
2471
2472public:
2474 llvm_unreachable("cloning not supported");
2475 }
2476
2477 static inline bool classof(const VPRecipeBase *R) {
2478 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2479 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2480 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2481 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2482 }
2483
2484 static inline bool classof(const VPUser *U) {
2485 auto *R = dyn_cast<VPRecipeBase>(U);
2486 return R && classof(R);
2487 }
2488
2489 /// Return whether the loaded-from / stored-to addresses are consecutive.
2490 bool isConsecutive() const { return Consecutive; }
2491
2492 /// Return whether the consecutive loaded/stored addresses are in reverse
2493 /// order.
2494 bool isReverse() const { return Reverse; }
2495
2496 /// Return the address accessed by this recipe.
2497 VPValue *getAddr() const { return getOperand(0); }
2498
2499 /// Returns true if the recipe is masked.
2500 bool isMasked() const { return IsMasked; }
2501
2502 /// Return the mask used by this recipe. Note that a full mask is represented
2503 /// by a nullptr.
2504 VPValue *getMask() const {
2505 // Mask is optional and therefore the last operand.
2506 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2507 }
2508
2509 /// Generate the wide load/store.
2510 void execute(VPTransformState &State) override {
2511 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2512 }
2513
2515};
2516
2517/// A recipe for widening load operations, using the address to load from and an
2518/// optional mask.
2519struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2521 bool Consecutive, bool Reverse, DebugLoc DL)
2522 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2523 Reverse, DL),
2524 VPValue(this, &Load) {
2525 setMask(Mask);
2526 }
2527
2529 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2531 getDebugLoc());
2532 }
2533
2534 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2535
2536 /// Generate a wide load or gather.
2537 void execute(VPTransformState &State) override;
2538
2539#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2540 /// Print the recipe.
2541 void print(raw_ostream &O, const Twine &Indent,
2542 VPSlotTracker &SlotTracker) const override;
2543#endif
2544
2545 /// Returns true if the recipe only uses the first lane of operand \p Op.
2546 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2548 "Op must be an operand of the recipe");
2549 // Widened, consecutive loads operations only demand the first lane of
2550 // their address.
2551 return Op == getAddr() && isConsecutive();
2552 }
2553};
2554
2555/// A recipe for widening load operations with vector-predication intrinsics,
2556/// using the address to load from, the explicit vector length and an optional
2557/// mask.
2558struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2560 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2561 {L->getAddr(), EVL}, L->isConsecutive(),
2562 L->isReverse(), L->getDebugLoc()),
2563 VPValue(this, &getIngredient()) {
2564 setMask(Mask);
2565 }
2566
2567 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2568
2569 /// Return the EVL operand.
2570 VPValue *getEVL() const { return getOperand(1); }
2571
2572 /// Generate the wide load or gather.
2573 void execute(VPTransformState &State) override;
2574
2575#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2576 /// Print the recipe.
2577 void print(raw_ostream &O, const Twine &Indent,
2578 VPSlotTracker &SlotTracker) const override;
2579#endif
2580
2581 /// Returns true if the recipe only uses the first lane of operand \p Op.
2582 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2584 "Op must be an operand of the recipe");
2585 // Widened loads only demand the first lane of EVL and consecutive loads
2586 // only demand the first lane of their address.
2587 return Op == getEVL() || (Op == getAddr() && isConsecutive());
2588 }
2589};
2590
2591/// A recipe for widening store operations, using the stored value, the address
2592/// to store to and an optional mask.
2595 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2596 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2598 setMask(Mask);
2599 }
2600
2602 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
2604 Reverse, getDebugLoc());
2605 }
2606
2607 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2608
2609 /// Return the value stored by this recipe.
2610 VPValue *getStoredValue() const { return getOperand(1); }
2611
2612 /// Generate a wide store or scatter.
2613 void execute(VPTransformState &State) override;
2614
2615#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2616 /// Print the recipe.
2617 void print(raw_ostream &O, const Twine &Indent,
2618 VPSlotTracker &SlotTracker) const override;
2619#endif
2620
2621 /// Returns true if the recipe only uses the first lane of operand \p Op.
2622 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2624 "Op must be an operand of the recipe");
2625 // Widened, consecutive stores only demand the first lane of their address,
2626 // unless the same operand is also stored.
2627 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2628 }
2629};
2630
2631/// A recipe for widening store operations with vector-predication intrinsics,
2632/// using the value to store, the address to store to, the explicit vector
2633/// length and an optional mask.
2636 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2637 {S->getAddr(), S->getStoredValue(), EVL},
2638 S->isConsecutive(), S->isReverse(),
2639 S->getDebugLoc()) {
2640 setMask(Mask);
2641 }
2642
2643 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2644
2645 /// Return the address accessed by this recipe.
2646 VPValue *getStoredValue() const { return getOperand(1); }
2647
2648 /// Return the EVL operand.
2649 VPValue *getEVL() const { return getOperand(2); }
2650
2651 /// Generate the wide store or scatter.
2652 void execute(VPTransformState &State) override;
2653
2654#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2655 /// Print the recipe.
2656 void print(raw_ostream &O, const Twine &Indent,
2657 VPSlotTracker &SlotTracker) const override;
2658#endif
2659
2660 /// Returns true if the recipe only uses the first lane of operand \p Op.
2661 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2663 "Op must be an operand of the recipe");
2664 if (Op == getEVL()) {
2665 assert(getStoredValue() != Op && "unexpected store of EVL");
2666 return true;
2667 }
2668 // Widened, consecutive memory operations only demand the first lane of
2669 // their address, unless the same operand is also stored. That latter can
2670 // happen with opaque pointers.
2671 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2672 }
2673};
2674
2675/// Recipe to expand a SCEV expression.
2677 const SCEV *Expr;
2678 ScalarEvolution &SE;
2679
2680public:
2682 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2683
2684 ~VPExpandSCEVRecipe() override = default;
2685
2687 return new VPExpandSCEVRecipe(Expr, SE);
2688 }
2689
2690 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2691
2692 /// Generate a canonical vector induction variable of the vector loop, with
2693 void execute(VPTransformState &State) override;
2694
2695#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2696 /// Print the recipe.
2697 void print(raw_ostream &O, const Twine &Indent,
2698 VPSlotTracker &SlotTracker) const override;
2699#endif
2700
2701 const SCEV *getSCEV() const { return Expr; }
2702};
2703
2704/// Canonical scalar induction phi of the vector loop. Starting at the specified
2705/// start value (either 0 or the resume value when vectorizing the epilogue
2706/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2707/// canonical induction variable.
2709public:
2711 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2712
2713 ~VPCanonicalIVPHIRecipe() override = default;
2714
2716 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
2717 R->addOperand(getBackedgeValue());
2718 return R;
2719 }
2720
2721 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2722
2724 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2725 }
2726
2727 /// Generate the canonical scalar induction phi of the vector loop.
2728 void execute(VPTransformState &State) override;
2729
2730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2731 /// Print the recipe.
2732 void print(raw_ostream &O, const Twine &Indent,
2733 VPSlotTracker &SlotTracker) const override;
2734#endif
2735
2736 /// Returns the scalar type of the induction.
2738 return getStartValue()->getLiveInIRValue()->getType();
2739 }
2740
2741 /// Returns true if the recipe only uses the first lane of operand \p Op.
2742 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2744 "Op must be an operand of the recipe");
2745 return true;
2746 }
2747
2748 /// Returns true if the recipe only uses the first part of operand \p Op.
2749 bool onlyFirstPartUsed(const VPValue *Op) const override {
2751 "Op must be an operand of the recipe");
2752 return true;
2753 }
2754
2755 /// Check if the induction described by \p Kind, /p Start and \p Step is
2756 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2758 VPValue *Step) const;
2759};
2760
2761/// A recipe for generating the active lane mask for the vector loop that is
2762/// used to predicate the vector operations.
2763/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2764/// remove VPActiveLaneMaskPHIRecipe.
2766public:
2768 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2769 DL) {}
2770
2771 ~VPActiveLaneMaskPHIRecipe() override = default;
2772
2775 }
2776
2777 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2778
2780 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2781 }
2782
2783 /// Generate the active lane mask phi of the vector loop.
2784 void execute(VPTransformState &State) override;
2785
2786#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2787 /// Print the recipe.
2788 void print(raw_ostream &O, const Twine &Indent,
2789 VPSlotTracker &SlotTracker) const override;
2790#endif
2791};
2792
2793/// A recipe for generating the phi node for the current index of elements,
2794/// adjusted in accordance with EVL value. It starts at the start value of the
2795/// canonical induction and gets incremented by EVL in each iteration of the
2796/// vector loop.
2798public:
2800 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2801
2802 ~VPEVLBasedIVPHIRecipe() override = default;
2803
2805 llvm_unreachable("cloning not implemented yet");
2806 }
2807
2808 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2809
2811 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2812 }
2813
2814 /// Generate phi for handling IV based on EVL over iterations correctly.
2815 /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2816 void execute(VPTransformState &State) override;
2817
2818 /// Returns true if the recipe only uses the first lane of operand \p Op.
2819 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2821 "Op must be an operand of the recipe");
2822 return true;
2823 }
2824
2825#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2826 /// Print the recipe.
2827 void print(raw_ostream &O, const Twine &Indent,
2828 VPSlotTracker &SlotTracker) const override;
2829#endif
2830};
2831
2832/// A Recipe for widening the canonical induction variable of the vector loop.
2834public:
2836 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2837
2838 ~VPWidenCanonicalIVRecipe() override = default;
2839
2841 return new VPWidenCanonicalIVRecipe(
2842 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
2843 }
2844
2845 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2846
2847 /// Generate a canonical vector induction variable of the vector loop, with
2848 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2849 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2850 void execute(VPTransformState &State) override;
2851
2852#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2853 /// Print the recipe.
2854 void print(raw_ostream &O, const Twine &Indent,
2855 VPSlotTracker &SlotTracker) const override;
2856#endif
2857};
2858
2859/// A recipe for converting the input value \p IV value to the corresponding
2860/// value of an IV with different start and step values, using Start + IV *
2861/// Step.
2863 /// Kind of the induction.
2865 /// If not nullptr, the floating point induction binary operator. Must be set
2866 /// for floating point inductions.
2867 const FPMathOperator *FPBinOp;
2868
2869public:
2871 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2873 IndDesc.getKind(),
2874 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2875 Start, CanonicalIV, Step) {}
2876
2878 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2879 VPValue *Step)
2880 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2881 FPBinOp(FPBinOp) {}
2882
2883 ~VPDerivedIVRecipe() override = default;
2884
2886 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
2887 getStepValue());
2888 }
2889
2890 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2891
2892 /// Generate the transformed value of the induction at offset StartValue (1.
2893 /// operand) + IV (2. operand) * StepValue (3, operand).
2894 void execute(VPTransformState &State) override;
2895
2896#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2897 /// Print the recipe.
2898 void print(raw_ostream &O, const Twine &Indent,
2899 VPSlotTracker &SlotTracker) const override;
2900#endif
2901
2903 return getStartValue()->getLiveInIRValue()->getType();
2904 }
2905
2906 VPValue *getStartValue() const { return getOperand(0); }
2907 VPValue *getStepValue() const { return getOperand(2); }
2908
2909 /// Returns true if the recipe only uses the first lane of operand \p Op.
2910 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2912 "Op must be an operand of the recipe");
2913 return true;
2914 }
2915};
2916
2917/// A recipe for handling phi nodes of integer and floating-point inductions,
2918/// producing their scalar values.
2920 Instruction::BinaryOps InductionOpcode;
2921
2922public:
2925 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2926 ArrayRef<VPValue *>({IV, Step}), FMFs),
2927 InductionOpcode(Opcode) {}
2928
2930 VPValue *Step)
2932 IV, Step, IndDesc.getInductionOpcode(),
2933 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2934 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2935 : FastMathFlags()) {}
2936
2937 ~VPScalarIVStepsRecipe() override = default;
2938
2940 return new VPScalarIVStepsRecipe(
2941 getOperand(0), getOperand(1), InductionOpcode,
2943 }
2944
2945 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2946
2947 /// Generate the scalarized versions of the phi node as needed by their users.
2948 void execute(VPTransformState &State) override;
2949
2950#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2951 /// Print the recipe.
2952 void print(raw_ostream &O, const Twine &Indent,
2953 VPSlotTracker &SlotTracker) const override;
2954#endif
2955
2956 VPValue *getStepValue() const { return getOperand(1); }
2957
2958 /// Returns true if the recipe only uses the first lane of operand \p Op.
2959 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2961 "Op must be an operand of the recipe");
2962 return true;
2963 }
2964};
2965
2966/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2967/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2968/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2970public:
2972
2973protected:
2974 /// The VPRecipes held in the order of output instructions to generate.
2976
2977 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
2978 : VPBlockBase(BlockSC, Name.str()) {}
2979
2980public:
2981 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2982 : VPBlockBase(VPBasicBlockSC, Name.str()) {
2983 if (Recipe)
2984 appendRecipe(Recipe);
2985 }
2986
2987 ~VPBasicBlock() override {
2988 while (!Recipes.empty())
2989 Recipes.pop_back();
2990 }
2991
2992 /// Instruction iterators...
2997
2998 //===--------------------------------------------------------------------===//
2999 /// Recipe iterator methods
3000 ///
3001 inline iterator begin() { return Recipes.begin(); }
3002 inline const_iterator begin() const { return Recipes.begin(); }
3003 inline iterator end() { return Recipes.end(); }
3004 inline const_iterator end() const { return Recipes.end(); }
3005
3006 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3007 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3008 inline reverse_iterator rend() { return Recipes.rend(); }
3009 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3010
3011 inline size_t size() const { return Recipes.size(); }
3012 inline bool empty() const { return Recipes.empty(); }
3013 inline const VPRecipeBase &front() const { return Recipes.front(); }
3014 inline VPRecipeBase &front() { return Recipes.front(); }
3015 inline const VPRecipeBase &back() const { return Recipes.back(); }
3016 inline VPRecipeBase &back() { return Recipes.back(); }
3017
3018 /// Returns a reference to the list of recipes.
3020
3021 /// Returns a pointer to a member of the recipe list.
3023 return &VPBasicBlock::Recipes;
3024 }
3025
3026 /// Method to support type inquiry through isa, cast, and dyn_cast.
3027 static inline bool classof(const VPBlockBase *V) {
3028 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3029 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3030 }
3031
3032 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3033 assert(Recipe && "No recipe to append.");
3034 assert(!Recipe->Parent && "Recipe already in VPlan");
3035 Recipe->Parent = this;
3036 Recipes.insert(InsertPt, Recipe);
3037 }
3038
3039 /// Augment the existing recipes of a VPBasicBlock with an additional
3040 /// \p Recipe as the last recipe.
3041 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3042
3043 /// The method which generates the output IR instructions that correspond to
3044 /// this VPBasicBlock, thereby "executing" the VPlan.
3045 void execute(VPTransformState *State) override;
3046
3047 /// Return the cost of this VPBasicBlock.
3049
3050 /// Return the position of the first non-phi node recipe in the block.
3052
3053 /// Returns an iterator range over the PHI-like recipes in the block.
3055 return make_range(begin(), getFirstNonPhi());
3056 }
3057
3058 void dropAllReferences(VPValue *NewValue) override;
3059
3060 /// Split current block at \p SplitAt by inserting a new block between the
3061 /// current block and its successors and moving all recipes starting at
3062 /// SplitAt to the new block. Returns the new block.
3063 VPBasicBlock *splitAt(iterator SplitAt);
3064
3066
3067#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3068 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3069 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3070 ///
3071 /// Note that the numbering is applied to the whole VPlan, so printing
3072 /// individual blocks is consistent with the whole VPlan printing.
3073 void print(raw_ostream &O, const Twine &Indent,
3074 VPSlotTracker &SlotTracker) const override;
3075 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3076#endif
3077
3078 /// If the block has multiple successors, return the branch recipe terminating
3079 /// the block. If there are no or only a single successor, return nullptr;
3081 const VPRecipeBase *getTerminator() const;
3082
3083 /// Returns true if the block is exiting it's parent region.
3084 bool isExiting() const;
3085
3086 /// Clone the current block and it's recipes, without updating the operands of
3087 /// the cloned recipes.
3088 VPBasicBlock *clone() override {
3089 auto *NewBlock = new VPBasicBlock(getName());
3090 for (VPRecipeBase &R : *this)
3091 NewBlock->appendRecipe(R.clone());
3092 return NewBlock;
3093 }
3094
3095protected:
3096 /// Execute the recipes in the IR basic block \p BB.
3097 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3098
3099private:
3100 /// Create an IR BasicBlock to hold the output instructions generated by this
3101 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3102 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3103};
3104
3105/// A special type of VPBasicBlock that wraps an existing IR basic block.
3106/// Recipes of the block get added before the first non-phi instruction in the
3107/// wrapped block.
3108/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3109/// preheader block.
3111 BasicBlock *IRBB;
3112
3113public:
3115 : VPBasicBlock(VPIRBasicBlockSC,
3116 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3117 IRBB(IRBB) {}
3118
3119 ~VPIRBasicBlock() override {}
3120
3121 static inline bool classof(const VPBlockBase *V) {
3122 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3123 }
3124
3125 /// The method which generates the output IR instructions that correspond to
3126 /// this VPBasicBlock, thereby "executing" the VPlan.
3127 void execute(VPTransformState *State) override;
3128
3129 VPIRBasicBlock *clone() override {
3130 auto *NewBlock = new VPIRBasicBlock(IRBB);
3131 for (VPRecipeBase &R : Recipes)
3132 NewBlock->appendRecipe(R.clone());
3133 return NewBlock;
3134 }
3135
3136 BasicBlock *getIRBasicBlock() const { return IRBB; }
3137};
3138
3139/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3140/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3141/// A VPRegionBlock may indicate that its contents are to be replicated several
3142/// times. This is designed to support predicated scalarization, in which a
3143/// scalar if-then code structure needs to be generated VF * UF times. Having
3144/// this replication indicator helps to keep a single model for multiple
3145/// candidate VF's. The actual replication takes place only once the desired VF
3146/// and UF have been determined.
3148 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3149 VPBlockBase *Entry;
3150
3151 /// Hold the Single Exiting block of the SESE region modelled by the
3152 /// VPRegionBlock.
3153 VPBlockBase *Exiting;
3154
3155 /// An indicator whether this region is to generate multiple replicated
3156 /// instances of output IR corresponding to its VPBlockBases.
3157 bool IsReplicator;
3158
3159public:
3161 const std::string &Name = "", bool IsReplicator = false)
3162 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3163 IsReplicator(IsReplicator) {
3164 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3165 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3166 Entry->setParent(this);
3167 Exiting->setParent(this);
3168 }
3169 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3170 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3171 IsReplicator(IsReplicator) {}
3172
3173 ~VPRegionBlock() override {
3174 if (Entry) {
3175 VPValue DummyValue;
3176 Entry->dropAllReferences(&DummyValue);
3177 deleteCFG(Entry);
3178 }
3179 }
3180
3181 /// Method to support type inquiry through isa, cast, and dyn_cast.
3182 static inline bool classof(const VPBlockBase *V) {
3183 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3184 }
3185
3186 const VPBlockBase *getEntry() const { return Entry; }
3187 VPBlockBase *getEntry() { return Entry; }
3188
3189 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3190 /// EntryBlock must have no predecessors.
3191 void setEntry(VPBlockBase *EntryBlock) {
3192 assert(EntryBlock->getPredecessors().empty() &&
3193 "Entry block cannot have predecessors.");
3194 Entry = EntryBlock;
3195 EntryBlock->setParent(this);
3196 }
3197
3198 const VPBlockBase *getExiting() const { return Exiting; }
3199 VPBlockBase *getExiting() { return Exiting; }
3200
3201 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3202 /// ExitingBlock must have no successors.
3203 void setExiting(VPBlockBase *ExitingBlock) {
3204 assert(ExitingBlock->getSuccessors().empty() &&
3205 "Exit block cannot have successors.");
3206 Exiting = ExitingBlock;
3207 ExitingBlock->setParent(this);
3208 }
3209
3210 /// Returns the pre-header VPBasicBlock of the loop region.
3212 assert(!isReplicator() && "should only get pre-header of loop regions");
3214 }
3215
3216 /// An indicator whether this region is to generate multiple replicated
3217 /// instances of output IR corresponding to its VPBlockBases.
3218 bool isReplicator() const { return IsReplicator; }
3219
3220 /// The method which generates the output IR instructions that correspond to
3221 /// this VPRegionBlock, thereby "executing" the VPlan.
3222 void execute(VPTransformState *State) override;
3223
3224 // Return the cost of this region.
3226
3227 void dropAllReferences(VPValue *NewValue) override;
3228
3229#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3230 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3231 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3232 /// consequtive numbers.
3233 ///
3234 /// Note that the numbering is applied to the whole VPlan, so printing
3235 /// individual regions is consistent with the whole VPlan printing.
3236 void print(raw_ostream &O, const Twine &Indent,
3237 VPSlotTracker &SlotTracker) const override;
3238 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3239#endif
3240
3241 /// Clone all blocks in the single-entry single-exit region of the block and
3242 /// their recipes without updating the operands of the cloned recipes.
3243 VPRegionBlock *clone() override;
3244};
3245
3246/// VPlan models a candidate for vectorization, encoding various decisions take
3247/// to produce efficient output IR, including which branches, basic-blocks and
3248/// output IR instructions to generate, and their cost. VPlan holds a
3249/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3250/// VPBasicBlock.
3251class VPlan {
3252 friend class VPlanPrinter;
3253 friend class VPSlotTracker;
3254
3255 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3256 /// preheader of the vector loop.
3257 VPBasicBlock *Entry;
3258
3259 /// VPBasicBlock corresponding to the original preheader. Used to place
3260 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3261 /// rest of VPlan execution.
3262 VPBasicBlock *Preheader;
3263
3264 /// Holds the VFs applicable to this VPlan.
3266
3267 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3268 /// any UF.
3270
3271 /// Holds the name of the VPlan, for printing.
3272 std::string Name;
3273
3274 /// Represents the trip count of the original loop, for folding
3275 /// the tail.
3276 VPValue *TripCount = nullptr;
3277
3278 /// Represents the backedge taken count of the original loop, for folding
3279 /// the tail. It equals TripCount - 1.
3280 VPValue *BackedgeTakenCount = nullptr;
3281
3282 /// Represents the vector trip count.
3283 VPValue VectorTripCount;
3284
3285 /// Represents the loop-invariant VF * UF of the vector loop region.
3286 VPValue VFxUF;
3287
3288 /// Holds a mapping between Values and their corresponding VPValue inside
3289 /// VPlan.
3290 Value2VPValueTy Value2VPValue;
3291
3292 /// Contains all the external definitions created for this VPlan. External
3293 /// definitions are VPValues that hold a pointer to their underlying IR.
3294 SmallVector<VPValue *, 16> VPLiveInsToFree;
3295
3296 /// Values used outside the plan. It contains live-outs that need fixing. Any
3297 /// live-out that is fixed outside VPlan needs to be removed. The remaining
3298 /// live-outs are fixed via VPLiveOut::fixPhi.
3300
3301 /// Mapping from SCEVs to the VPValues representing their expansions.
3302 /// NOTE: This mapping is temporary and will be removed once all users have
3303 /// been modeled in VPlan directly.
3304 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3305
3306public:
3307 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3308 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3309 /// be disconnected, as the bypass blocks between them are not yet modeled in
3310 /// VPlan.
3311 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3312 : VPlan(Preheader, Entry) {
3313 TripCount = TC;
3314 }
3315
3316 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
3317 /// the plan. At the moment, \p Preheader and \p Entry need to be
3318 /// disconnected, as the bypass blocks between them are not yet modeled in
3319 /// VPlan.
3320 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3321 : Entry(Entry), Preheader(Preheader) {
3322 Entry->setPlan(this);
3323 Preheader->setPlan(this);
3324 assert(Preheader->getNumSuccessors() == 0 &&
3325 Preheader->getNumPredecessors() == 0 &&
3326 "preheader must be disconnected");
3327 }
3328
3329 ~VPlan();
3330
3331 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3332 /// original scalar pre-header ) which contains SCEV expansions that need
3333 /// to happen before the CFG is modified; a VPBasicBlock for the vector
3334 /// pre-header, followed by a region for the vector loop, followed by the
3335 /// middle VPBasicBlock. If a check is needed to guard executing the scalar
3336 /// epilogue loop, it will be added to the middle block, together with
3337 /// VPBasicBlocks for the scalar preheader and exit blocks.
3338 static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3339 ScalarEvolution &PSE,
3340 bool RequiresScalarEpilogueCheck,
3341 bool TailFolded, Loop *TheLoop);
3342
3343 /// Prepare the plan for execution, setting up the required live-in values.
3344 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3345 Value *CanonicalIVStartValue, VPTransformState &State);
3346
3347 /// Generate the IR code for this VPlan.
3348 void execute(VPTransformState *State);
3349
3350 /// Return the cost of this plan.
3352
3353 VPBasicBlock *getEntry() { return Entry; }
3354 const VPBasicBlock *getEntry() const { return Entry; }
3355
3356 /// The trip count of the original loop.
3358 assert(TripCount && "trip count needs to be set before accessing it");
3359 return TripCount;
3360 }
3361
3362 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3363 /// the original trip count have been replaced.
3364 void resetTripCount(VPValue *NewTripCount) {
3365 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3366 "TripCount always must be set");
3367 TripCount = NewTripCount;
3368 }
3369
3370 /// The backedge taken count of the original loop.
3372 if (!BackedgeTakenCount)
3373 BackedgeTakenCount = new VPValue();
3374 return BackedgeTakenCount;
3375 }
3376
3377 /// The vector trip count.
3378 VPValue &getVectorTripCount() { return VectorTripCount; }
3379
3380 /// Returns VF * UF of the vector loop region.
3381 VPValue &getVFxUF() { return VFxUF; }
3382
3383 void addVF(ElementCount VF) { VFs.insert(VF); }
3384
3386 assert(hasVF(VF) && "Cannot set VF not already in plan");
3387 VFs.clear();
3388 VFs.insert(VF);
3389 }
3390
3391 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3393 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3394 }
3395
3396 /// Returns an iterator range over all VFs of the plan.
3399 return {VFs.begin(), VFs.end()};
3400 }
3401
3402 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3403
3404 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3405
3406 void setUF(unsigned UF) {
3407 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3408 UFs.clear();
3409 UFs.insert(UF);
3410 }
3411
3412 /// Return a string with the name of the plan and the applicable VFs and UFs.
3413 std::string getName() const;
3414
3415 void setName(const Twine &newName) { Name = newName.str(); }
3416
3417 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3418 /// yet) for \p V.
3420 assert(V && "Trying to get or add the VPValue of a null Value");
3421 if (!Value2VPValue.count(V)) {
3422 VPValue *VPV = new VPValue(V);
3423 VPLiveInsToFree.push_back(VPV);
3424 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3425 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3426 Value2VPValue[V] = VPV;
3427 }
3428
3429 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3430 assert(Value2VPValue[V]->isLiveIn() &&
3431 "Only live-ins should be in mapping");
3432 return Value2VPValue[V];
3433 }
3434
3435 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3436 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3437
3438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3439 /// Print the live-ins of this VPlan to \p O.
3440 void printLiveIns(raw_ostream &O) const;
3441
3442 /// Print this VPlan to \p O.
3443 void print(raw_ostream &O) const;
3444
3445 /// Print this VPlan in DOT format to \p O.
3446 void printDOT(raw_ostream &O) const;
3447
3448 /// Dump the plan to stderr (for debugging).
3449 LLVM_DUMP_METHOD void dump() const;
3450#endif
3451
3452 /// Returns the VPRegionBlock of the vector loop.
3454 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3455 }
3457 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3458 }
3459
3460 /// Returns the canonical induction recipe of the vector loop.
3463 if (EntryVPBB->empty()) {
3464 // VPlan native path.
3465 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
3466 }
3467 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
3468 }
3469
3470 void addLiveOut(PHINode *PN, VPValue *V);
3471
3473 delete LiveOuts[PN];
3474 LiveOuts.erase(PN);
3475 }
3476
3478 return LiveOuts;
3479 }
3480
3481 VPValue *getSCEVExpansion(const SCEV *S) const {
3482 return SCEVToExpansion.lookup(S);
3483 }
3484
3485 void addSCEVExpansion(const SCEV *S, VPValue *V) {
3486 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3487 SCEVToExpansion[S] = V;
3488 }
3489
3490 /// \return The block corresponding to the original preheader.
3491 VPBasicBlock *getPreheader() { return Preheader; }
3492 const VPBasicBlock *getPreheader() const { return Preheader; }
3493
3494 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3495 /// recipes to refer to the clones, and return it.
3496 VPlan *duplicate();
3497};
3498
3499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3500/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3501/// indented and follows the dot format.
3503 raw_ostream &OS;
3504 const VPlan &Plan;
3505 unsigned Depth = 0;
3506 unsigned TabWidth = 2;
3507 std::string Indent;
3508 unsigned BID = 0;
3510
3512
3513 /// Handle indentation.
3514 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3515
3516 /// Print a given \p Block of the Plan.
3517 void dumpBlock(const VPBlockBase *Block);
3518
3519 /// Print the information related to the CFG edges going out of a given
3520 /// \p Block, followed by printing the successor blocks themselves.
3521 void dumpEdges(const VPBlockBase *Block);
3522
3523 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3524 /// its successor blocks.
3525 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3526
3527 /// Print a given \p Region of the Plan.
3528 void dumpRegion(const VPRegionBlock *Region);
3529
3530 unsigned getOrCreateBID(const VPBlockBase *Block) {
3531 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
3532 }
3533
3534 Twine getOrCreateName(const VPBlockBase *Block);
3535
3536 Twine getUID(const VPBlockBase *Block);
3537
3538 /// Print the information related to a CFG edge between two VPBlockBases.
3539 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3540 const Twine &Label);
3541
3542public:
3544 : OS(O), Plan(P), SlotTracker(&P) {}
3545
3546 LLVM_DUMP_METHOD void dump();
3547};
3548
3550 const Value *V;
3551
3552 VPlanIngredient(const Value *V) : V(V) {}
3553
3554 void print(raw_ostream &O) const;
3555};
3556
3558 I.print(OS);
3559 return OS;
3560}
3561
3563 Plan.print(OS);
3564 return OS;
3565}
3566#endif
3567
3568//===----------------------------------------------------------------------===//
3569// VPlan Utilities
3570//===----------------------------------------------------------------------===//
3571
3572/// Class that provides utilities for VPBlockBases in VPlan.
3574public:
3575 VPBlockUtils() = delete;
3576
3577 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3578 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3579 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3580 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3581 /// have neither successors nor predecessors.
3582 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3583 assert(NewBlock->getSuccessors().empty() &&
3584 NewBlock->getPredecessors().empty() &&
3585 "Can't insert new block with predecessors or successors.");
3586 NewBlock->setParent(BlockPtr->getParent());
3587 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3588 for (VPBlockBase *Succ : Succs) {
3589 disconnectBlocks(BlockPtr, Succ);
3590 connectBlocks(NewBlock, Succ);
3591 }
3592 connectBlocks(BlockPtr, NewBlock);
3593 }
3594
3595 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3596 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3597 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3598 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3599 /// and \p IfTrue and \p IfFalse must have neither successors nor
3600 /// predecessors.
3601 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3602 VPBlockBase *BlockPtr) {
3603 assert(IfTrue->getSuccessors().empty() &&
3604 "Can't insert IfTrue with successors.");
3605 assert(IfFalse->getSuccessors().empty() &&
3606 "Can't insert IfFalse with successors.");
3607 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3608 IfTrue->setPredecessors({BlockPtr});
3609 IfFalse->setPredecessors({BlockPtr});
3610 IfTrue->setParent(BlockPtr->getParent());
3611 IfFalse->setParent(BlockPtr->getParent());
3612 }
3613
3614 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3615 /// the successors of \p From and \p From to the predecessors of \p To. Both
3616 /// VPBlockBases must have the same parent, which can be null. Both
3617 /// VPBlockBases can be already connected to other VPBlockBases.
3619 assert((From->getParent() == To->getParent()) &&
3620 "Can't connect two block with different parents");
3621 assert(From->getNumSuccessors() < 2 &&
3622 "Blocks can't have more than two successors.");
3623 From->appendSuccessor(To);
3624 To->appendPredecessor(From);
3625 }
3626
3627 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3628 /// from the successors of \p From and \p From from the predecessors of \p To.
3630 assert(To && "Successor to disconnect is null.");
3631 From->removeSuccessor(To);
3632 To->removePredecessor(From);
3633 }
3634
3635 /// Return an iterator range over \p Range which only includes \p BlockTy
3636 /// blocks. The accesses are casted to \p BlockTy.
3637 template <typename BlockTy, typename T>
3638 static auto blocksOnly(const T &Range) {
3639 // Create BaseTy with correct const-ness based on BlockTy.
3640 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3641 const VPBlockBase, VPBlockBase>;
3642
3643 // We need to first create an iterator range over (const) BlocktTy & instead
3644 // of (const) BlockTy * for filter_range to work properly.
3645 auto Mapped =
3646 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3648 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3649 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3650 return cast<BlockTy>(&Block);
3651 });
3652 }
3653};
3654
3657 InterleaveGroupMap;
3658
3659 /// Type for mapping of instruction based interleave groups to VPInstruction
3660 /// interleave groups
3663
3664 /// Recursively \p Region and populate VPlan based interleave groups based on
3665 /// \p IAI.
3666 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3668 /// Recursively traverse \p Block and populate VPlan based interleave groups
3669 /// based on \p IAI.
3670 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3672
3673public:
3675
3678 // Avoid releasing a pointer twice.
3679 for (auto &I : InterleaveGroupMap)
3680 DelSet.insert(I.second);
3681 for (auto *Ptr : DelSet)
3682 delete Ptr;
3683 }
3684
3685 /// Get the interleave group that \p Instr belongs to.
3686 ///
3687 /// \returns nullptr if doesn't have such group.
3690 return InterleaveGroupMap.lookup(Instr);
3691 }
3692};
3693
3694/// Class that maps (parts of) an existing VPlan to trees of combined
3695/// VPInstructions.
3697 enum class OpMode { Failed, Load, Opcode };
3698
3699 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3700 /// DenseMap keys.
3701 struct BundleDenseMapInfo {
3702 static SmallVector<VPValue *, 4> getEmptyKey() {
3703 return {reinterpret_cast<VPValue *>(-1)};
3704 }
3705
3706 static SmallVector<VPValue *, 4> getTombstoneKey() {
3707 return {reinterpret_cast<VPValue *>(-2)};
3708 }
3709
3710 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3711 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3712 }
3713
3714 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3716 return LHS == RHS;
3717 }
3718 };
3719
3720 /// Mapping of values in the original VPlan to a combined VPInstruction.
3722 BundleToCombined;
3723
3725
3726 /// Basic block to operate on. For now, only instructions in a single BB are
3727 /// considered.
3728 const VPBasicBlock &BB;
3729
3730 /// Indicates whether we managed to combine all visited instructions or not.
3731 bool CompletelySLP = true;
3732
3733 /// Width of the widest combined bundle in bits.
3734 unsigned WidestBundleBits = 0;
3735
3736 using MultiNodeOpTy =
3737 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3738
3739 // Input operand bundles for the current multi node. Each multi node operand
3740 // bundle contains values not matching the multi node's opcode. They will
3741 // be reordered in reorderMultiNodeOps, once we completed building a
3742 // multi node.
3743 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3744
3745 /// Indicates whether we are building a multi node currently.
3746 bool MultiNodeActive = false;
3747
3748 /// Check if we can vectorize Operands together.
3749 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3750
3751 /// Add combined instruction \p New for the bundle \p Operands.
3752 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3753
3754 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3755 VPInstruction *markFailed();
3756
3757 /// Reorder operands in the multi node to maximize sequential memory access
3758 /// and commutative operations.
3759 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3760
3761 /// Choose the best candidate to use for the lane after \p Last. The set of
3762 /// candidates to choose from are values with an opcode matching \p Last's
3763 /// or loads consecutive to \p Last.
3764 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3765 SmallPtrSetImpl<VPValue *> &Candidates,
3767
3768#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3769 /// Print bundle \p Values to dbgs().
3770 void dumpBundle(ArrayRef<VPValue *> Values);
3771#endif
3772
3773public:
3774 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3775
3776 ~VPlanSlp() = default;
3777
3778 /// Tries to build an SLP tree rooted at \p Operands and returns a
3779 /// VPInstruction combining \p Operands, if they can be combined.
3781
3782 /// Return the width of the widest combined bundle in bits.
3783 unsigned getWidestBundleBits() const { return WidestBundleBits; }
3784
3785 /// Return true if all visited instruction can be combined.
3786 bool isCompletelySLP() const { return CompletelySLP; }
3787};
3788
3789namespace vputils {
3790
3791/// Returns true if only the first lane of \p Def is used.
3792bool onlyFirstLaneUsed(const VPValue *Def);
3793
3794/// Returns true if only the first part of \p Def is used.
3795bool onlyFirstPartUsed(const VPValue *Def);
3796
3797/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3798/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3799/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3800/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3801/// create a new one.
3803 ScalarEvolution &SE);
3804
3805/// Returns true if \p VPV is uniform after vectorization.
3807 // A value defined outside the vector region must be uniform after
3808 // vectorization inside a vector region.
3810 return true;
3811 VPRecipeBase *Def = VPV->getDefiningRecipe();
3812 assert(Def && "Must have definition for value defined inside vector region");
3813 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3814 return Rep->isUniform();
3815 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3816 return all_of(GEP->operands(), isUniformAfterVectorization);
3817 if (auto *VPI = dyn_cast<VPInstruction>(Def))
3818 return VPI->isSingleScalar() || VPI->isVectorToScalar();
3819 return false;
3820}
3821
3822/// Return true if \p V is a header mask in \p Plan.
3823bool isHeaderMask(VPValue *V, VPlan &Plan);
3824} // end namespace vputils
3825
3826} // end namespace llvm
3827
3828#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:391
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1294
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:869
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:470
uint32_t getFactor() const
Definition: VectorUtils.h:486
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:540
InstTy * getInsertPos() const
Definition: VectorUtils.h:556
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:612
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:696
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:124
ElementCount operator*() const
Definition: VPlan.h:132
iterator & operator++()
Definition: VPlan.h:134
iterator(ElementCount VF)
Definition: VPlan.h:128
bool operator==(const iterator &Other) const
Definition: VPlan.h:130
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2765
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2773
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2779
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:2767
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2969
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:2994
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3041
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3088
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:2996
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2993
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:483
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3019
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:2977
iterator end()
Definition: VPlan.h:3003
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:2981
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3001
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:2995
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3054
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:780
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:212
~VPBasicBlock() override
Definition: VPlan.h:2987
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:575
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:527
const_reverse_iterator rbegin() const
Definition: VPlan.h:3007
reverse_iterator rend()
Definition: VPlan.h:3008
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:550
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:2975
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:537
VPRecipeBase & back()
Definition: VPlan.h:3016
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:642
const VPRecipeBase & front() const
Definition: VPlan.h:3013
const_iterator begin() const
Definition: VPlan.h:3002
VPRecipeBase & front()
Definition: VPlan.h:3014
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:625
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:613
const VPRecipeBase & back() const
Definition: VPlan.h:3015
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3032
bool empty() const
Definition: VPlan.h:3012
const_iterator end() const
Definition: VPlan.h:3004
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3027
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3022
reverse_iterator rbegin()
Definition: VPlan.h:3006
size_t size() const
Definition: VPlan.h:3011
const_reverse_iterator rend() const
Definition: VPlan.h:3009
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2023
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2028
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2066
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2046
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2051
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:2043
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2034
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:437
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:632
VPRegionBlock * getParent()
Definition: VPlan.h:509
VPBlocksTy & getPredecessors()
Definition: VPlan.h:540
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:177
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:690
void setName(const Twine &newName)
Definition: VPlan.h:502
size_t getNumSuccessors() const
Definition: VPlan.h:554
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:537
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:630
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:655
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:680
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:590
size_t getNumPredecessors() const
Definition: VPlan.h:555
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:623
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:199
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:539
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:207
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:494
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:150
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:169
const VPRegionBlock * getParent() const
Definition: VPlan.h:510
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:666
const std::string & getName() const
Definition: VPlan.h:500
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:642
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:580
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:614
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:550
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:574
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:639
unsigned getVPBlockID() const
Definition: VPlan.h:507
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:486
VPBlocksTy & getSuccessors()
Definition: VPlan.h:535
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:191
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:155
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:603
void setParent(VPRegionBlock *P)
Definition: VPlan.h:520
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:596
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:544
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:534
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:3573
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:3638
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:3582
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:3601
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3629
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3618
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2359
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2391
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2379
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2361
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2367
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2398
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:2708
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:2749
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2723
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2715
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:2710
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2742
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2737
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:307
unsigned getVPDefID() const
Definition: VPlanValue.h:428
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:2862
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2877
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2907
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Definition: VPlan.h:2870
Type * getScalarType() const
Definition: VPlan.h:2902
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2885
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2910
VPValue * getStartValue() const
Definition: VPlan.h:2906
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:2797
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2810
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2804
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:2799
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2819
Recipe to expand a SCEV expression.
Definition: VPlan.h:2676
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:2681
const SCEV * getSCEV() const
Definition: VPlan.h:2701
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2686
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1709
static bool classof(const VPValue *V)
Definition: VPlan.h:1726
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:1711
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1753
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1742
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1750
VPValue * getStartValue() const
Definition: VPlan.h:1745
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1722
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1759
~VPHeaderPHIRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3110
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:452
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3114
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3136
~VPIRBasicBlock() override
Definition: VPlan.h:3119
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3121
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3129
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1229
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1247
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1235
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1250
@ CalculateTripCountMinusVF
Definition: VPlan.h:1248
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1304
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1334
bool hasResult() const
Definition: VPlan.h:1365
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1341
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1316
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1309
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1321
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:1358
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2080
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2159
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2121
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2092
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2127
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2113
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2134
Instruction * getInsertPos() const
Definition: VPlan.h:2165
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2150
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2154
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:3689
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:156
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:196
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:231
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:70
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:178
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:212
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:182
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:215
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:202
static VPLane getFirstLane()
Definition: VPlan.h:180
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:159
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:218
A value that is used outside the VPlan.
Definition: VPlan.h:704
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:708
static bool classof(const VPUser *U)
Definition: VPlan.h:711
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:722
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:728
void fixPhi(VPlan &Plan, VPTransformState &State)
Fix the wrapped phi node.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2410
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2434
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2414
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2418
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:764
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:855
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe using the legacy cost model and the underlying instructions.
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:789
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:860
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:831
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:775
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:790
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:836
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:780
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:844
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:964
ExactFlagsTy ExactFlags
Definition: VPlan.h:1020
FastMathFlagsTy FMFs
Definition: VPlan.h:1023
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1097
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:1022
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:1017
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1072
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1149
bool isInBounds() const
Definition: VPlan.h:1191
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1103
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1084
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1118
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1198
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:1042
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:1019
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1078
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1090
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:1028
WrapFlagsTy WrapFlags
Definition: VPlan.h:1018
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1202
bool isDisjoint() const
Definition: VPlan.h:1214
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1185
bool hasNoSignedWrap() const
Definition: VPlan.h:1208
static bool classof(const VPUser *U)
Definition: VPlan.h:1112
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:1035
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2245
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2275
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2272
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2256
VPReductionEVLRecipe(VPReductionRecipe *R, VPValue *EVL, VPValue *CondOp)
Definition: VPlan.h:2247
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:1964
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1977
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2015
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1987
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2018
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1997
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2010
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2171
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2230
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2204
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2234
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2224
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2190
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2236
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2179
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2228
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2232
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2199
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2209
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3147
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:713
const VPBlockBase * getEntry() const
Definition: VPlan.h:3186
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3218
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:722
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3203
VPBlockBase * getExiting()
Definition: VPlan.h:3199
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3191
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:787
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:823
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3169
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3160
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:729
const VPBlockBase * getExiting() const
Definition: VPlan.h:3198
VPBlockBase * getEntry()
Definition: VPlan.h:3187
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3211
~VPRegionBlock() override
Definition: VPlan.h:3173
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3182
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2286
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2331
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2338
bool isUniform() const
Definition: VPlan.h:2326
bool isPredicated() const
Definition: VPlan.h:2328
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2305
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2295
unsigned getOpcode() const
Definition: VPlan.h:2355
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2350
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1485
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1499
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1515
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1513
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1493
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:2919
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2959
VPValue * getStepValue() const
Definition: VPlan.h:2956
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2929
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2939
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:2923
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:891
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:897
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:955
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:906
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:958
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:894
static bool classof(const VPUser *U)
Definition: VPlan.h:947
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:902
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:449
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:39
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:202
operand_range operands()
Definition: VPlanValue.h:272
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:251
operand_iterator op_begin()
Definition: VPlanValue.h:268
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:252
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:246
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:120
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:186
A recipe to compute the pointers for widened memory accesses of IndexTy for all parts.
Definition: VPlan.h:1653
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, bool IsInBounds, DebugLoc DL)
Definition: VPlan.h:1658
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1668
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1674
A recipe for widening Call instructions.
Definition: VPlan.h:1524
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1565
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1548
VPWidenCallRecipe(Value *UV, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID, DebugLoc DL={}, Function *Variant=nullptr)
Definition: VPlan.h:1536
Function * getCalledScalarFunction() const
Definition: VPlan.h:1558
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1562
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:2833
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2840
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:2835
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1437
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1445
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1478
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1481
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1453
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1459
A recipe for handling GEP instructions.
Definition: VPlan.h:1611
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1633
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1628
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1766
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc)
Definition: VPlan.h:1779
const TruncInst * getTruncInst() const
Definition: VPlan.h:1827
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:1813
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1789
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1826
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1821
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc)
Definition: VPlan.h:1772
const VPValue * getStepValue() const
Definition: VPlan.h:1822
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1840
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1806
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1832
A common base class for widening memory operations.
Definition: VPlan.h:2443
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2454
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2451
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2490
static bool classof(const VPUser *U)
Definition: VPlan.h:2484
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2510
Instruction & Ingredient
Definition: VPlan.h:2445
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2473
Instruction & getIngredient() const
Definition: VPlan.h:2514
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2448
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2477
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2464
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2504
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2500
void setMask(VPValue *Mask)
Definition: VPlan.h:2456
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2497
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2494
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:1892
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1922
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1931
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1898
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1904
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1928
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1865
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1880
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1853
VPWidenRecipe is a recipe for producing a copy of vector type its ingredient.
Definition: VPlan.h:1405
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1416
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1410
unsigned getOpcode() const
Definition: VPlan.h:1427
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:3502
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:3543
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1282
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:3696
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:3786
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:3774
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:3783
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3251
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1173
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1149
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:916
bool hasScalableVF()
Definition: VPlan.h:3392
VPBasicBlock * getEntry()
Definition: VPlan.h:3353
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3378
void setName(const Twine &newName)
Definition: VPlan.h:3415
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3381
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3357
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3371
void removeLiveOut(PHINode *PN)
Definition: VPlan.h:3472
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3398
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:1182
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3354
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader, trip count TC and Entry to the plan.
Definition: VPlan.h:3311
VPBasicBlock * getPreheader()
Definition: VPlan.h:3491
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3453
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:3456
bool hasVF(ElementCount VF)
Definition: VPlan.h:3391
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:3485
bool hasUF(unsigned UF) const
Definition: VPlan.h:3404
void setVF(ElementCount VF)
Definition: VPlan.h:3385
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1086
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3364
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header ) which con...
Definition: VPlan.cpp:858
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader and Entry to the plan.
Definition: VPlan.h:3320
const VPBasicBlock * getPreheader() const
Definition: VPlan.h:3492
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3419
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1179
bool hasScalarVFOnly() const
Definition: VPlan.h:3402
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:976
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:3461
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:3477
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1123
void addVF(ElementCount VF)
Definition: VPlan.h:3383
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3436
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:3481
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1093
void setUF(unsigned UF)
Definition: VPlan.h:3406
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1225
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1610
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3806
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1605
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1600
bool isHeaderMask(VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
Definition: VPlan.cpp:1627
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *OrigLoop)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:147
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:95
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:100
iterator end()
Definition: VPlan.h:141
const ElementCount Start
Definition: VPlan.h:102
ElementCount End
Definition: VPlan.h:105
iterator begin()
Definition: VPlan.h:140
bool isEmpty() const
Definition: VPlan.h:107
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:111
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:737
LLVMContext & LLVMCtx
Definition: VPlan.h:740
LoopVectorizationCostModel & CM
Definition: VPlan.h:741
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
Definition: VPlan.h:744
VPTypeAnalysis Types
Definition: VPlan.h:739
const TargetTransformInfo & TTI
Definition: VPlan.h:738
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:742
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1937
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1947
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1938
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1943
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:238
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:248
unsigned Part
in [0..UF)
Definition: VPlan.h:240
VPLane Lane
Definition: VPlan.h:242
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:244
bool isFirstIteration() const
Definition: VPlan.h:250
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:981
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:378
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:384
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:392
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:380
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:388
CFGState(DominatorTree *DT)
Definition: VPlan.h:397
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:356
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:395
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:273
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:278
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:275
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:255
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:254
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:406
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:429
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:432
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:369
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:322
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:344
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:425
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:361
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:401
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:330
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:307
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:267
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:409
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:295
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:415
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:412
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:261
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:418
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:380
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2558
void execute(VPTransformState &State) override
Generate the wide load or gather.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2570
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2559
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2582
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2519
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2520
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2546
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2528
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1577
bool isInvariantCond() const
Definition: VPlan.h:1605
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1585
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1579
VPValue * getCond() const
Definition: VPlan.h:1601
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:2634
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2646
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2635
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2661
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2649
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:2593
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2622
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2594
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2610
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2601
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:3552
const Value * V
Definition: VPlan.h:3550
void print(raw_ostream &O) const
Definition: VPlan.cpp:1400