LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/Twine.h"
36#include "llvm/ADT/ilist.h"
37#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
87 Loop *CurLoop = nullptr);
88
89/// A helper function that returns the reciprocal of the block probability of
90/// predicated blocks. If we return X, we are assuming the predicated block
91/// will execute once for every X iterations of the loop header.
92///
93/// TODO: We should use actual block probability here, if available. Currently,
94/// we always assume predicated blocks have a 50% chance of executing.
95inline unsigned getReciprocalPredBlockProb() { return 2; }
96
97/// A range of powers-of-2 vectorization factors with fixed start and
98/// adjustable end. The range includes start and excludes end, e.g.,:
99/// [1, 16) = {1, 2, 4, 8}
100struct VFRange {
101 // A power of 2.
103
104 // A power of 2. If End <= Start range is empty.
106
107 bool isEmpty() const {
109 }
110
112 : Start(Start), End(End) {
114 "Both Start and End should have the same scalable flag");
116 "Expected Start to be a power of 2");
118 "Expected End to be a power of 2");
119 }
120
121 /// Iterator to iterate over vectorization factors in a VFRange.
123 : public iterator_facade_base<iterator, std::forward_iterator_tag,
124 ElementCount> {
125 ElementCount VF;
126
127 public:
128 iterator(ElementCount VF) : VF(VF) {}
129
130 bool operator==(const iterator &Other) const { return VF == Other.VF; }
131
132 ElementCount operator*() const { return VF; }
133
135 VF *= 2;
136 return *this;
137 }
138 };
139
143 return iterator(End);
144 }
145};
146
147using VPlanPtr = std::unique_ptr<VPlan>;
148
149/// In what follows, the term "input IR" refers to code that is fed into the
150/// vectorizer whereas the term "output IR" refers to code that is generated by
151/// the vectorizer.
152
153/// VPLane provides a way to access lanes in both fixed width and scalable
154/// vectors, where for the latter the lane index sometimes needs calculating
155/// as a runtime expression.
156class VPLane {
157public:
158 /// Kind describes how to interpret Lane.
159 enum class Kind : uint8_t {
160 /// For First, Lane is the index into the first N elements of a
161 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
162 First,
163 /// For ScalableLast, Lane is the offset from the start of the last
164 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
165 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
166 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
168 };
169
170private:
171 /// in [0..VF)
172 unsigned Lane;
173
174 /// Indicates how the Lane should be interpreted, as described above.
175 Kind LaneKind;
176
177public:
178 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
179
181
182 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
183 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
184 "trying to extract with invalid offset");
185 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
186 Kind LaneKind;
187 if (VF.isScalable())
188 // In this case 'LaneOffset' refers to the offset from the start of the
189 // last subvector with VF.getKnownMinValue() elements.
191 else
192 LaneKind = VPLane::Kind::First;
193 return VPLane(LaneOffset, LaneKind);
194 }
195
197 return getLaneFromEnd(VF, 1);
198 }
199
200 /// Returns a compile-time known value for the lane index and asserts if the
201 /// lane can only be calculated at runtime.
202 unsigned getKnownLane() const {
203 assert(LaneKind == Kind::First);
204 return Lane;
205 }
206
207 /// Returns an expression describing the lane index that can be used at
208 /// runtime.
209 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
210
211 /// Returns the Kind of lane offset.
212 Kind getKind() const { return LaneKind; }
213
214 /// Returns true if this is the first lane of the whole vector.
215 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
216
217 /// Maps the lane to a cache index based on \p VF.
218 unsigned mapToCacheIndex(const ElementCount &VF) const {
219 switch (LaneKind) {
221 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
222 return VF.getKnownMinValue() + Lane;
223 default:
224 assert(Lane < VF.getKnownMinValue());
225 return Lane;
226 }
227 }
228
229 /// Returns the maxmimum number of lanes that we are able to consider
230 /// caching for \p VF.
231 static unsigned getNumCachedLanes(const ElementCount &VF) {
232 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
233 }
234};
235
236/// VPIteration represents a single point in the iteration space of the output
237/// (vectorized and/or unrolled) IR loop.
239 /// in [0..UF)
240 unsigned Part;
241
243
244 VPIteration(unsigned Part, unsigned Lane,
246 : Part(Part), Lane(Lane, Kind) {}
247
248 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
249
250 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
251};
252
253/// VPTransformState holds information passed down when "executing" a VPlan,
254/// needed for generating the output IR.
259
260 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
262 unsigned UF;
263
264 /// Hold the indices to generate specific scalar instructions. Null indicates
265 /// that all instances are to be generated, using either scalar or vector
266 /// instructions.
267 std::optional<VPIteration> Instance;
268
269 struct DataState {
270 /// A type for vectorized values in the new loop. Each value from the
271 /// original loop, when vectorized, is represented by UF vector values in
272 /// the new unrolled loop, where UF is the unroll factor.
274
276
280
281 /// Get the generated vector Value for a given VPValue \p Def and a given \p
282 /// Part if \p IsScalar is false, otherwise return the generated scalar
283 /// for \p Part. \See set.
284 Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
285
286 /// Get the generated Value for a given VPValue and given Part and Lane.
287 Value *get(VPValue *Def, const VPIteration &Instance);
288
289 bool hasVectorValue(VPValue *Def, unsigned Part) {
290 auto I = Data.PerPartOutput.find(Def);
291 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
292 I->second[Part];
293 }
294
296 auto I = Data.PerPartScalars.find(Def);
297 if (I == Data.PerPartScalars.end())
298 return false;
299 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
300 return Instance.Part < I->second.size() &&
301 CacheIdx < I->second[Instance.Part].size() &&
302 I->second[Instance.Part][CacheIdx];
303 }
304
305 /// Set the generated vector Value for a given VPValue and a given Part, if \p
306 /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
307 void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
308 if (IsScalar) {
309 set(Def, V, VPIteration(Part, 0));
310 return;
311 }
312 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
313 "scalar values must be stored as (Part, 0)");
314 if (!Data.PerPartOutput.count(Def)) {
316 Data.PerPartOutput[Def] = Entry;
317 }
318 Data.PerPartOutput[Def][Part] = V;
319 }
320
321 /// Reset an existing vector value for \p Def and a given \p Part.
322 void reset(VPValue *Def, Value *V, unsigned Part) {
323 auto Iter = Data.PerPartOutput.find(Def);
324 assert(Iter != Data.PerPartOutput.end() &&
325 "need to overwrite existing value");
326 Iter->second[Part] = V;
327 }
328
329 /// Set the generated scalar \p V for \p Def and the given \p Instance.
330 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
331 auto Iter = Data.PerPartScalars.insert({Def, {}});
332 auto &PerPartVec = Iter.first->second;
333 if (PerPartVec.size() <= Instance.Part)
334 PerPartVec.resize(Instance.Part + 1);
335 auto &Scalars = PerPartVec[Instance.Part];
336 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
337 if (Scalars.size() <= CacheIdx)
338 Scalars.resize(CacheIdx + 1);
339 assert(!Scalars[CacheIdx] && "should overwrite existing value");
340 Scalars[CacheIdx] = V;
341 }
342
343 /// Reset an existing scalar value for \p Def and a given \p Instance.
344 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
345 auto Iter = Data.PerPartScalars.find(Def);
346 assert(Iter != Data.PerPartScalars.end() &&
347 "need to overwrite existing value");
348 assert(Instance.Part < Iter->second.size() &&
349 "need to overwrite existing value");
350 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
351 assert(CacheIdx < Iter->second[Instance.Part].size() &&
352 "need to overwrite existing value");
353 Iter->second[Instance.Part][CacheIdx] = V;
354 }
355
356 /// Add additional metadata to \p To that was not present on \p Orig.
357 ///
358 /// Currently this is used to add the noalias annotations based on the
359 /// inserted memchecks. Use this for instructions that are *cloned* into the
360 /// vector loop.
361 void addNewMetadata(Instruction *To, const Instruction *Orig);
362
363 /// Add metadata from one instruction to another.
364 ///
365 /// This includes both the original MDs from \p From and additional ones (\see
366 /// addNewMetadata). Use this for *newly created* instructions in the vector
367 /// loop.
368 void addMetadata(Value *To, Instruction *From);
369
370 /// Set the debug location in the builder using the debug location \p DL.
372
373 /// Construct the vector value of a scalarized value \p V one lane at a time.
375
376 /// Hold state information used when constructing the CFG of the output IR,
377 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
378 struct CFGState {
379 /// The previous VPBasicBlock visited. Initially set to null.
381
382 /// The previous IR BasicBlock created or used. Initially set to the new
383 /// header BasicBlock.
384 BasicBlock *PrevBB = nullptr;
385
386 /// The last IR BasicBlock in the output IR. Set to the exit block of the
387 /// vector loop.
388 BasicBlock *ExitBB = nullptr;
389
390 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
391 /// of replication, maps the BasicBlock of the last replica created.
393
394 /// Updater for the DominatorTree.
396
398 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
399
400 /// Returns the BasicBlock* mapped to the pre-header of the loop region
401 /// containing \p R.
404
405 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
407
408 /// Hold a reference to the IRBuilder used to generate output IR code.
410
411 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
413
414 /// Pointer to the VPlan code is generated for.
416
417 /// The loop object for the current parent region, or nullptr.
419
420 /// LoopVersioning. It's only set up (non-null) if memchecks were
421 /// used.
422 ///
423 /// This is currently only used to add no-alias metadata based on the
424 /// memchecks. The actually versioning is performed manually.
426
427 /// Map SCEVs to their expanded values. Populated when executing
428 /// VPExpandSCEVRecipes.
430
431 /// VPlan-based type analysis.
433};
434
435/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
436/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
438 friend class VPBlockUtils;
439
440 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
441
442 /// An optional name for the block.
443 std::string Name;
444
445 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
446 /// it is a topmost VPBlockBase.
447 VPRegionBlock *Parent = nullptr;
448
449 /// List of predecessor blocks.
451
452 /// List of successor blocks.
454
455 /// VPlan containing the block. Can only be set on the entry block of the
456 /// plan.
457 VPlan *Plan = nullptr;
458
459 /// Add \p Successor as the last successor to this block.
460 void appendSuccessor(VPBlockBase *Successor) {
461 assert(Successor && "Cannot add nullptr successor!");
462 Successors.push_back(Successor);
463 }
464
465 /// Add \p Predecessor as the last predecessor to this block.
466 void appendPredecessor(VPBlockBase *Predecessor) {
467 assert(Predecessor && "Cannot add nullptr predecessor!");
468 Predecessors.push_back(Predecessor);
469 }
470
471 /// Remove \p Predecessor from the predecessors of this block.
472 void removePredecessor(VPBlockBase *Predecessor) {
473 auto Pos = find(Predecessors, Predecessor);
474 assert(Pos && "Predecessor does not exist");
475 Predecessors.erase(Pos);
476 }
477
478 /// Remove \p Successor from the successors of this block.
479 void removeSuccessor(VPBlockBase *Successor) {
480 auto Pos = find(Successors, Successor);
481 assert(Pos && "Successor does not exist");
482 Successors.erase(Pos);
483 }
484
485protected:
486 VPBlockBase(const unsigned char SC, const std::string &N)
487 : SubclassID(SC), Name(N) {}
488
489public:
490 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
491 /// that are actually instantiated. Values of this enumeration are kept in the
492 /// SubclassID field of the VPBlockBase objects. They are used for concrete
493 /// type identification.
494 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
495
497
498 virtual ~VPBlockBase() = default;
499
500 const std::string &getName() const { return Name; }
501
502 void setName(const Twine &newName) { Name = newName.str(); }
503
504 /// \return an ID for the concrete type of this object.
505 /// This is used to implement the classof checks. This should not be used
506 /// for any other purpose, as the values may change as LLVM evolves.
507 unsigned getVPBlockID() const { return SubclassID; }
508
509 VPRegionBlock *getParent() { return Parent; }
510 const VPRegionBlock *getParent() const { return Parent; }
511
512 /// \return A pointer to the plan containing the current block.
513 VPlan *getPlan();
514 const VPlan *getPlan() const;
515
516 /// Sets the pointer of the plan containing the block. The block must be the
517 /// entry block into the VPlan.
518 void setPlan(VPlan *ParentPlan);
519
520 void setParent(VPRegionBlock *P) { Parent = P; }
521
522 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
523 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
524 /// VPBlockBase is a VPBasicBlock, it is returned.
525 const VPBasicBlock *getEntryBasicBlock() const;
527
528 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
529 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
530 /// VPBlockBase is a VPBasicBlock, it is returned.
531 const VPBasicBlock *getExitingBasicBlock() const;
533
534 const VPBlocksTy &getSuccessors() const { return Successors; }
535 VPBlocksTy &getSuccessors() { return Successors; }
536
538
539 const VPBlocksTy &getPredecessors() const { return Predecessors; }
540 VPBlocksTy &getPredecessors() { return Predecessors; }
541
542 /// \return the successor of this VPBlockBase if it has a single successor.
543 /// Otherwise return a null pointer.
545 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
546 }
547
548 /// \return the predecessor of this VPBlockBase if it has a single
549 /// predecessor. Otherwise return a null pointer.
551 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
552 }
553
554 size_t getNumSuccessors() const { return Successors.size(); }
555 size_t getNumPredecessors() const { return Predecessors.size(); }
556
557 /// An Enclosing Block of a block B is any block containing B, including B
558 /// itself. \return the closest enclosing block starting from "this", which
559 /// has successors. \return the root enclosing block if all enclosing blocks
560 /// have no successors.
562
563 /// \return the closest enclosing block starting from "this", which has
564 /// predecessors. \return the root enclosing block if all enclosing blocks
565 /// have no predecessors.
567
568 /// \return the successors either attached directly to this VPBlockBase or, if
569 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
570 /// successors of its own, search recursively for the first enclosing
571 /// VPRegionBlock that has successors and return them. If no such
572 /// VPRegionBlock exists, return the (empty) successors of the topmost
573 /// VPBlockBase reached.
576 }
577
578 /// \return the hierarchical successor of this VPBlockBase if it has a single
579 /// hierarchical successor. Otherwise return a null pointer.
582 }
583
584 /// \return the predecessors either attached directly to this VPBlockBase or,
585 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
586 /// predecessors of its own, search recursively for the first enclosing
587 /// VPRegionBlock that has predecessors and return them. If no such
588 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
589 /// VPBlockBase reached.
592 }
593
594 /// \return the hierarchical predecessor of this VPBlockBase if it has a
595 /// single hierarchical predecessor. Otherwise return a null pointer.
598 }
599
600 /// Set a given VPBlockBase \p Successor as the single successor of this
601 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
602 /// This VPBlockBase must have no successors.
604 assert(Successors.empty() && "Setting one successor when others exist.");
605 assert(Successor->getParent() == getParent() &&
606 "connected blocks must have the same parent");
607 appendSuccessor(Successor);
608 }
609
610 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
611 /// successors of this VPBlockBase. This VPBlockBase is not added as
612 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
613 /// successors.
614 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
615 assert(Successors.empty() && "Setting two successors when others exist.");
616 appendSuccessor(IfTrue);
617 appendSuccessor(IfFalse);
618 }
619
620 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
621 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
622 /// as successor of any VPBasicBlock in \p NewPreds.
624 assert(Predecessors.empty() && "Block predecessors already set.");
625 for (auto *Pred : NewPreds)
626 appendPredecessor(Pred);
627 }
628
629 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
630 /// This VPBlockBase must have no successors. This VPBlockBase is not added
631 /// as predecessor of any VPBasicBlock in \p NewSuccs.
633 assert(Successors.empty() && "Block successors already set.");
634 for (auto *Succ : NewSuccs)
635 appendSuccessor(Succ);
636 }
637
638 /// Remove all the predecessor of this block.
639 void clearPredecessors() { Predecessors.clear(); }
640
641 /// Remove all the successors of this block.
642 void clearSuccessors() { Successors.clear(); }
643
644 /// The method which generates the output IR that correspond to this
645 /// VPBlockBase, thereby "executing" the VPlan.
646 virtual void execute(VPTransformState *State) = 0;
647
648 /// Return the cost of the block.
650
651 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
652 static void deleteCFG(VPBlockBase *Entry);
653
654 /// Return true if it is legal to hoist instructions into this block.
656 // There are currently no constraints that prevent an instruction to be
657 // hoisted into a VPBlockBase.
658 return true;
659 }
660
661 /// Replace all operands of VPUsers in the block with \p NewValue and also
662 /// replaces all uses of VPValues defined in the block with NewValue.
663 virtual void dropAllReferences(VPValue *NewValue) = 0;
664
665#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
666 void printAsOperand(raw_ostream &OS, bool PrintType) const {
667 OS << getName();
668 }
669
670 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
671 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
672 /// consequtive numbers.
673 ///
674 /// Note that the numbering is applied to the whole VPlan, so printing
675 /// individual blocks is consistent with the whole VPlan printing.
676 virtual void print(raw_ostream &O, const Twine &Indent,
677 VPSlotTracker &SlotTracker) const = 0;
678
679 /// Print plain-text dump of this VPlan to \p O.
680 void print(raw_ostream &O) const {
682 print(O, "", SlotTracker);
683 }
684
685 /// Print the successors of this block to \p O, prefixing all lines with \p
686 /// Indent.
687 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
688
689 /// Dump this VPBlockBase to dbgs().
690 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
691#endif
692
693 /// Clone the current block and it's recipes without updating the operands of
694 /// the cloned recipes, including all blocks in the single-entry single-exit
695 /// region for VPRegionBlocks.
696 virtual VPBlockBase *clone() = 0;
697};
698
699/// A value that is used outside the VPlan. The operand of the user needs to be
700/// added to the associated phi node. The incoming block from VPlan is
701/// determined by where the VPValue is defined: if it is defined by a recipe
702/// outside a region, its parent block is used, otherwise the middle block is
703/// used.
704class VPLiveOut : public VPUser {
705 PHINode *Phi;
706
707public:
709 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
710
711 static inline bool classof(const VPUser *U) {
712 return U->getVPUserID() == VPUser::VPUserID::LiveOut;
713 }
714
715 /// Fix the wrapped phi node. This means adding an incoming value to exit
716 /// block phi's from the vector loop via middle block (values from scalar loop
717 /// already reach these phi's), and updating the value to scalar header phi's
718 /// from the scalar preheader.
719 void fixPhi(VPlan &Plan, VPTransformState &State);
720
721 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
722 bool usesScalars(const VPValue *Op) const override {
724 "Op must be an operand of the recipe");
725 return true;
726 }
727
728 PHINode *getPhi() const { return Phi; }
729
730#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
731 /// Print the VPLiveOut to \p O.
733#endif
734};
735
736/// Struct to hold various analysis needed for cost computations.
744
746 Type *CanIVTy, LLVMContext &LLVMCtx,
748 : TTI(TTI), TLI(TLI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {}
749
750 /// Return the cost for \p UI with \p VF using the legacy cost model as
751 /// fallback until computing the cost of all recipes migrates to VPlan.
753
754 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
755 /// has already been pre-computed.
756 bool skipCostComputation(Instruction *UI, bool IsVector) const;
757};
758
759/// VPRecipeBase is a base class modeling a sequence of one or more output IR
760/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
761/// and is responsible for deleting its defined values. Single-value
762/// recipes must inherit from VPSingleDef instead of inheriting from both
763/// VPRecipeBase and VPValue separately.
764class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
765 public VPDef,
766 public VPUser {
767 friend VPBasicBlock;
768 friend class VPBlockUtils;
769
770 /// Each VPRecipe belongs to a single VPBasicBlock.
771 VPBasicBlock *Parent = nullptr;
772
773 /// The debug location for the recipe.
774 DebugLoc DL;
775
776public:
778 DebugLoc DL = {})
780
781 template <typename IterT>
783 DebugLoc DL = {})
785 virtual ~VPRecipeBase() = default;
786
787 /// Clone the current recipe.
788 virtual VPRecipeBase *clone() = 0;
789
790 /// \return the VPBasicBlock which this VPRecipe belongs to.
791 VPBasicBlock *getParent() { return Parent; }
792 const VPBasicBlock *getParent() const { return Parent; }
793
794 /// The method which generates the output IR instructions that correspond to
795 /// this VPRecipe, thereby "executing" the VPlan.
796 virtual void execute(VPTransformState &State) = 0;
797
798 /// Return the cost of this recipe, taking into account if the cost
799 /// computation should be skipped and the ForceTargetInstructionCost flag.
800 /// Also takes care of printing the cost for debugging.
802
803 /// Insert an unlinked recipe into a basic block immediately before
804 /// the specified recipe.
805 void insertBefore(VPRecipeBase *InsertPos);
806 /// Insert an unlinked recipe into \p BB immediately before the insertion
807 /// point \p IP;
809
810 /// Insert an unlinked Recipe into a basic block immediately after
811 /// the specified Recipe.
812 void insertAfter(VPRecipeBase *InsertPos);
813
814 /// Unlink this recipe from its current VPBasicBlock and insert it into
815 /// the VPBasicBlock that MovePos lives in, right after MovePos.
816 void moveAfter(VPRecipeBase *MovePos);
817
818 /// Unlink this recipe and insert into BB before I.
819 ///
820 /// \pre I is a valid iterator into BB.
822
823 /// This method unlinks 'this' from the containing basic block, but does not
824 /// delete it.
825 void removeFromParent();
826
827 /// This method unlinks 'this' from the containing basic block and deletes it.
828 ///
829 /// \returns an iterator pointing to the element after the erased one
831
832 /// Method to support type inquiry through isa, cast, and dyn_cast.
833 static inline bool classof(const VPDef *D) {
834 // All VPDefs are also VPRecipeBases.
835 return true;
836 }
837
838 static inline bool classof(const VPUser *U) {
839 return U->getVPUserID() == VPUser::VPUserID::Recipe;
840 }
841
842 /// Returns true if the recipe may have side-effects.
843 bool mayHaveSideEffects() const;
844
845 /// Returns true for PHI-like recipes.
846 bool isPhi() const {
847 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
848 }
849
850 /// Returns true if the recipe may read from memory.
851 bool mayReadFromMemory() const;
852
853 /// Returns true if the recipe may write to memory.
854 bool mayWriteToMemory() const;
855
856 /// Returns true if the recipe may read from or write to memory.
857 bool mayReadOrWriteMemory() const {
859 }
860
861 /// Returns the debug location of the recipe.
862 DebugLoc getDebugLoc() const { return DL; }
863
864protected:
865 /// Compute the cost of this recipe either using a recipe's specialized
866 /// implementation or using the legacy cost model and the underlying
867 /// instructions.
869 VPCostContext &Ctx) const;
870};
871
872// Helper macro to define common classof implementations for recipes.
873#define VP_CLASSOF_IMPL(VPDefID) \
874 static inline bool classof(const VPDef *D) { \
875 return D->getVPDefID() == VPDefID; \
876 } \
877 static inline bool classof(const VPValue *V) { \
878 auto *R = V->getDefiningRecipe(); \
879 return R && R->getVPDefID() == VPDefID; \
880 } \
881 static inline bool classof(const VPUser *U) { \
882 auto *R = dyn_cast<VPRecipeBase>(U); \
883 return R && R->getVPDefID() == VPDefID; \
884 } \
885 static inline bool classof(const VPRecipeBase *R) { \
886 return R->getVPDefID() == VPDefID; \
887 } \
888 static inline bool classof(const VPSingleDefRecipe *R) { \
889 return R->getVPDefID() == VPDefID; \
890 }
891
892/// VPSingleDef is a base class for recipes for modeling a sequence of one or
893/// more output IR that define a single result VPValue.
894/// Note that VPRecipeBase must be inherited from before VPValue.
895class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
896public:
897 template <typename IterT>
898 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
899 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
900
901 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
902 DebugLoc DL = {})
903 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
904
905 template <typename IterT>
906 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
907 DebugLoc DL = {})
908 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
909
910 static inline bool classof(const VPRecipeBase *R) {
911 switch (R->getVPDefID()) {
912 case VPRecipeBase::VPDerivedIVSC:
913 case VPRecipeBase::VPEVLBasedIVPHISC:
914 case VPRecipeBase::VPExpandSCEVSC:
915 case VPRecipeBase::VPInstructionSC:
916 case VPRecipeBase::VPReductionEVLSC:
917 case VPRecipeBase::VPReductionSC:
918 case VPRecipeBase::VPReplicateSC:
919 case VPRecipeBase::VPScalarIVStepsSC:
920 case VPRecipeBase::VPVectorPointerSC:
921 case VPRecipeBase::VPWidenCallSC:
922 case VPRecipeBase::VPWidenCanonicalIVSC:
923 case VPRecipeBase::VPWidenCastSC:
924 case VPRecipeBase::VPWidenGEPSC:
925 case VPRecipeBase::VPWidenSC:
926 case VPRecipeBase::VPWidenSelectSC:
927 case VPRecipeBase::VPBlendSC:
928 case VPRecipeBase::VPPredInstPHISC:
929 case VPRecipeBase::VPCanonicalIVPHISC:
930 case VPRecipeBase::VPActiveLaneMaskPHISC:
931 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
932 case VPRecipeBase::VPWidenPHISC:
933 case VPRecipeBase::VPWidenIntOrFpInductionSC:
934 case VPRecipeBase::VPWidenPointerInductionSC:
935 case VPRecipeBase::VPReductionPHISC:
936 case VPRecipeBase::VPScalarCastSC:
937 return true;
938 case VPRecipeBase::VPInterleaveSC:
939 case VPRecipeBase::VPBranchOnMaskSC:
940 case VPRecipeBase::VPWidenLoadEVLSC:
941 case VPRecipeBase::VPWidenLoadSC:
942 case VPRecipeBase::VPWidenStoreEVLSC:
943 case VPRecipeBase::VPWidenStoreSC:
944 // TODO: Widened stores don't define a value, but widened loads do. Split
945 // the recipes to be able to make widened loads VPSingleDefRecipes.
946 return false;
947 }
948 llvm_unreachable("Unhandled VPDefID");
949 }
950
951 static inline bool classof(const VPUser *U) {
952 auto *R = dyn_cast<VPRecipeBase>(U);
953 return R && classof(R);
954 }
955
956 virtual VPSingleDefRecipe *clone() override = 0;
957
958 /// Returns the underlying instruction.
960 return cast<Instruction>(getUnderlyingValue());
961 }
963 return cast<Instruction>(getUnderlyingValue());
964 }
965};
966
967/// Class to record LLVM IR flag for a recipe along with it.
969 enum class OperationType : unsigned char {
970 Cmp,
971 OverflowingBinOp,
972 DisjointOp,
973 PossiblyExactOp,
974 GEPOp,
975 FPMathOp,
976 NonNegOp,
977 Other
978 };
979
980public:
981 struct WrapFlagsTy {
982 char HasNUW : 1;
983 char HasNSW : 1;
984
986 };
987
989 char IsDisjoint : 1;
991 };
992
993protected:
994 struct GEPFlagsTy {
995 char IsInBounds : 1;
997 };
998
999private:
1000 struct ExactFlagsTy {
1001 char IsExact : 1;
1002 };
1003 struct NonNegFlagsTy {
1004 char NonNeg : 1;
1005 };
1006 struct FastMathFlagsTy {
1007 char AllowReassoc : 1;
1008 char NoNaNs : 1;
1009 char NoInfs : 1;
1010 char NoSignedZeros : 1;
1011 char AllowReciprocal : 1;
1012 char AllowContract : 1;
1013 char ApproxFunc : 1;
1014
1015 FastMathFlagsTy(const FastMathFlags &FMF);
1016 };
1017
1018 OperationType OpType;
1019
1020 union {
1024 ExactFlagsTy ExactFlags;
1026 NonNegFlagsTy NonNegFlags;
1027 FastMathFlagsTy FMFs;
1028 unsigned AllFlags;
1029 };
1030
1031protected:
1033 OpType = Other.OpType;
1034 AllFlags = Other.AllFlags;
1035 }
1036
1037public:
1038 template <typename IterT>
1039 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
1040 : VPSingleDefRecipe(SC, Operands, DL) {
1041 OpType = OperationType::Other;
1042 AllFlags = 0;
1043 }
1044
1045 template <typename IterT>
1046 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1048 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1049 OpType = OperationType::Cmp;
1050 CmpPredicate = Op->getPredicate();
1051 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1052 OpType = OperationType::DisjointOp;
1053 DisjointFlags.IsDisjoint = Op->isDisjoint();
1054 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1055 OpType = OperationType::OverflowingBinOp;
1056 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1057 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1058 OpType = OperationType::PossiblyExactOp;
1059 ExactFlags.IsExact = Op->isExact();
1060 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1061 OpType = OperationType::GEPOp;
1062 GEPFlags.IsInBounds = GEP->isInBounds();
1063 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1064 OpType = OperationType::NonNegOp;
1065 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1066 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1067 OpType = OperationType::FPMathOp;
1068 FMFs = Op->getFastMathFlags();
1069 } else {
1070 OpType = OperationType::Other;
1071 AllFlags = 0;
1072 }
1073 }
1074
1075 template <typename IterT>
1076 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1077 CmpInst::Predicate Pred, DebugLoc DL = {})
1078 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1079 CmpPredicate(Pred) {}
1080
1081 template <typename IterT>
1082 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1084 : VPSingleDefRecipe(SC, Operands, DL),
1085 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1086
1087 template <typename IterT>
1088 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1089 FastMathFlags FMFs, DebugLoc DL = {})
1090 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1091 FMFs(FMFs) {}
1092
1093 template <typename IterT>
1094 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1096 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1098
1099protected:
1100 template <typename IterT>
1101 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1102 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1103 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1104 GEPFlags(GEPFlags) {}
1105
1106public:
1107 static inline bool classof(const VPRecipeBase *R) {
1108 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1109 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1110 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1111 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1112 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1113 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1114 }
1115
1116 static inline bool classof(const VPUser *U) {
1117 auto *R = dyn_cast<VPRecipeBase>(U);
1118 return R && classof(R);
1119 }
1120
1121 /// Drop all poison-generating flags.
1123 // NOTE: This needs to be kept in-sync with
1124 // Instruction::dropPoisonGeneratingFlags.
1125 switch (OpType) {
1126 case OperationType::OverflowingBinOp:
1127 WrapFlags.HasNUW = false;
1128 WrapFlags.HasNSW = false;
1129 break;
1130 case OperationType::DisjointOp:
1131 DisjointFlags.IsDisjoint = false;
1132 break;
1133 case OperationType::PossiblyExactOp:
1134 ExactFlags.IsExact = false;
1135 break;
1136 case OperationType::GEPOp:
1137 GEPFlags.IsInBounds = false;
1138 break;
1139 case OperationType::FPMathOp:
1140 FMFs.NoNaNs = false;
1141 FMFs.NoInfs = false;
1142 break;
1143 case OperationType::NonNegOp:
1144 NonNegFlags.NonNeg = false;
1145 break;
1146 case OperationType::Cmp:
1147 case OperationType::Other:
1148 break;
1149 }
1150 }
1151
1152 /// Set the IR flags for \p I.
1153 void setFlags(Instruction *I) const {
1154 switch (OpType) {
1155 case OperationType::OverflowingBinOp:
1156 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1157 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1158 break;
1159 case OperationType::DisjointOp:
1160 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1161 break;
1162 case OperationType::PossiblyExactOp:
1163 I->setIsExact(ExactFlags.IsExact);
1164 break;
1165 case OperationType::GEPOp:
1166 // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1167 cast<GetElementPtrInst>(I)->setNoWrapFlags(
1170 break;
1171 case OperationType::FPMathOp:
1172 I->setHasAllowReassoc(FMFs.AllowReassoc);
1173 I->setHasNoNaNs(FMFs.NoNaNs);
1174 I->setHasNoInfs(FMFs.NoInfs);
1175 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1176 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1177 I->setHasAllowContract(FMFs.AllowContract);
1178 I->setHasApproxFunc(FMFs.ApproxFunc);
1179 break;
1180 case OperationType::NonNegOp:
1181 I->setNonNeg(NonNegFlags.NonNeg);
1182 break;
1183 case OperationType::Cmp:
1184 case OperationType::Other:
1185 break;
1186 }
1187 }
1188
1190 assert(OpType == OperationType::Cmp &&
1191 "recipe doesn't have a compare predicate");
1192 return CmpPredicate;
1193 }
1194
1195 bool isInBounds() const {
1196 assert(OpType == OperationType::GEPOp &&
1197 "recipe doesn't have inbounds flag");
1198 return GEPFlags.IsInBounds;
1199 }
1200
1201 /// Returns true if the recipe has fast-math flags.
1202 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1203
1205
1206 bool hasNoUnsignedWrap() const {
1207 assert(OpType == OperationType::OverflowingBinOp &&
1208 "recipe doesn't have a NUW flag");
1209 return WrapFlags.HasNUW;
1210 }
1211
1212 bool hasNoSignedWrap() const {
1213 assert(OpType == OperationType::OverflowingBinOp &&
1214 "recipe doesn't have a NSW flag");
1215 return WrapFlags.HasNSW;
1216 }
1217
1218 bool isDisjoint() const {
1219 assert(OpType == OperationType::DisjointOp &&
1220 "recipe cannot have a disjoing flag");
1222 }
1223
1224#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1225 void printFlags(raw_ostream &O) const;
1226#endif
1227};
1228
1229/// This is a concrete Recipe that models a single VPlan-level instruction.
1230/// While as any Recipe it may generate a sequence of IR instructions when
1231/// executed, these instructions would always form a single-def expression as
1232/// the VPInstruction is also a single def-use vertex.
1234 friend class VPlanSlp;
1235
1236public:
1237 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1238 enum {
1240 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1241 // values of a first-order recurrence.
1247 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1248 /// The first operand is the incoming value from the predecessor in VPlan,
1249 /// the second operand is the incoming value for all other predecessors
1250 /// (which are currently not modeled in VPlan).
1253 // Increment the canonical IV separately for each unrolled part.
1258 // Takes the VPValue to extract from as first operand and the lane or part
1259 // to extract as second operand, counting from the end starting with 1 for
1260 // last. The second operand must be a positive constant and <= VF when
1261 // extracting from a vector or <= UF when extracting from an unrolled
1262 // scalar.
1264 LogicalAnd, // Non-poison propagating logical And.
1265 // Add an offset in bytes (second operand) to a base pointer (first
1266 // operand). Only generates scalar values (either for the first lane only or
1267 // for all lanes, depending on its uses).
1269 };
1270
1271private:
1272 typedef unsigned char OpcodeTy;
1273 OpcodeTy Opcode;
1274
1275 /// An optional name that can be used for the generated IR instruction.
1276 const std::string Name;
1277
1278 /// Returns true if this VPInstruction generates scalar values for all lanes.
1279 /// Most VPInstructions generate a single value per part, either vector or
1280 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1281 /// values per all lanes, stemming from an original ingredient. This method
1282 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1283 /// underlying ingredient.
1284 bool doesGeneratePerAllLanes() const;
1285
1286 /// Returns true if we can generate a scalar for the first lane only if
1287 /// needed.
1288 bool canGenerateScalarForFirstLane() const;
1289
1290 /// Utility methods serving execute(): generates a single instance of the
1291 /// modeled instruction for a given part. \returns the generated value for \p
1292 /// Part. In some cases an existing value is returned rather than a generated
1293 /// one.
1294 Value *generatePerPart(VPTransformState &State, unsigned Part);
1295
1296 /// Utility methods serving execute(): generates a scalar single instance of
1297 /// the modeled instruction for a given lane. \returns the scalar generated
1298 /// value for lane \p Lane.
1299 Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1300
1301#if !defined(NDEBUG)
1302 /// Return true if the VPInstruction is a floating point math operation, i.e.
1303 /// has fast-math flags.
1304 bool isFPMathOp() const;
1305#endif
1306
1307public:
1309 const Twine &Name = "")
1310 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1311 Opcode(Opcode), Name(Name.str()) {}
1312
1313 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1314 DebugLoc DL = {}, const Twine &Name = "")
1316
1317 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1318 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1319
1320 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1321 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1322 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1323 Opcode(Opcode), Name(Name.str()) {}
1324
1325 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1326 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1327 const Twine &Name = "")
1328 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1329 Opcode(Opcode), Name(Name.str()) {
1330 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1331 }
1332
1333 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1334 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1335
1336 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1337
1338 VPInstruction *clone() override {
1340 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1341 New->transferFlags(*this);
1342 return New;
1343 }
1344
1345 unsigned getOpcode() const { return Opcode; }
1346
1347 /// Generate the instruction.
1348 /// TODO: We currently execute only per-part unless a specific instance is
1349 /// provided.
1350 void execute(VPTransformState &State) override;
1351
1352#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1353 /// Print the VPInstruction to \p O.
1354 void print(raw_ostream &O, const Twine &Indent,
1355 VPSlotTracker &SlotTracker) const override;
1356
1357 /// Print the VPInstruction to dbgs() (for debugging).
1358 LLVM_DUMP_METHOD void dump() const;
1359#endif
1360
1361 /// Return true if this instruction may modify memory.
1362 bool mayWriteToMemory() const {
1363 // TODO: we can use attributes of the called function to rule out memory
1364 // modifications.
1365 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1366 Opcode == Instruction::Invoke || Opcode == SLPStore;
1367 }
1368
1369 bool hasResult() const {
1370 // CallInst may or may not have a result, depending on the called function.
1371 // Conservatively return calls have results for now.
1372 switch (getOpcode()) {
1373 case Instruction::Ret:
1374 case Instruction::Br:
1375 case Instruction::Store:
1376 case Instruction::Switch:
1377 case Instruction::IndirectBr:
1378 case Instruction::Resume:
1379 case Instruction::CatchRet:
1380 case Instruction::Unreachable:
1381 case Instruction::Fence:
1382 case Instruction::AtomicRMW:
1385 return false;
1386 default:
1387 return true;
1388 }
1389 }
1390
1391 /// Returns true if the recipe only uses the first lane of operand \p Op.
1392 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1393
1394 /// Returns true if the recipe only uses the first part of operand \p Op.
1395 bool onlyFirstPartUsed(const VPValue *Op) const override;
1396
1397 /// Returns true if this VPInstruction produces a scalar value from a vector,
1398 /// e.g. by performing a reduction or extracting a lane.
1399 bool isVectorToScalar() const;
1400
1401 /// Returns true if this VPInstruction's operands are single scalars and the
1402 /// result is also a single scalar.
1403 bool isSingleScalar() const;
1404};
1405
1406/// VPWidenRecipe is a recipe for producing a widened instruction using the
1407/// opcode and operands of the recipe. This recipe covers most of the
1408/// traditional vectorization cases where each recipe transforms into a
1409/// vectorized version of itself.
1411 unsigned Opcode;
1412
1413public:
1414 template <typename IterT>
1416 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1417 Opcode(I.getOpcode()) {}
1418
1419 ~VPWidenRecipe() override = default;
1420
1421 VPWidenRecipe *clone() override {
1422 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1423 R->transferFlags(*this);
1424 return R;
1425 }
1426
1427 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1428
1429 /// Produce a widened instruction using the opcode and operands of the recipe,
1430 /// processing State.VF elements.
1431 void execute(VPTransformState &State) override;
1432
1433 /// Return the cost of this VPWidenRecipe.
1435 VPCostContext &Ctx) const override;
1436
1437 unsigned getOpcode() const { return Opcode; }
1438
1439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1440 /// Print the recipe.
1441 void print(raw_ostream &O, const Twine &Indent,
1442 VPSlotTracker &SlotTracker) const override;
1443#endif
1444};
1445
1446/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1448 /// Cast instruction opcode.
1449 Instruction::CastOps Opcode;
1450
1451 /// Result type for the cast.
1452 Type *ResultTy;
1453
1454public:
1456 CastInst &UI)
1457 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1458 ResultTy(ResultTy) {
1459 assert(UI.getOpcode() == Opcode &&
1460 "opcode of underlying cast doesn't match");
1461 }
1462
1464 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1465 ResultTy(ResultTy) {}
1466
1467 ~VPWidenCastRecipe() override = default;
1468
1470 if (auto *UV = getUnderlyingValue())
1471 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1472 *cast<CastInst>(UV));
1473
1474 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1475 }
1476
1477 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1478
1479 /// Produce widened copies of the cast.
1480 void execute(VPTransformState &State) override;
1481
1482#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1483 /// Print the recipe.
1484 void print(raw_ostream &O, const Twine &Indent,
1485 VPSlotTracker &SlotTracker) const override;
1486#endif
1487
1488 Instruction::CastOps getOpcode() const { return Opcode; }
1489
1490 /// Returns the result type of the cast.
1491 Type *getResultType() const { return ResultTy; }
1492};
1493
1494/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1496 Instruction::CastOps Opcode;
1497
1498 Type *ResultTy;
1499
1500 Value *generate(VPTransformState &State, unsigned Part);
1501
1502public:
1504 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1505 ResultTy(ResultTy) {}
1506
1507 ~VPScalarCastRecipe() override = default;
1508
1510 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1511 }
1512
1513 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1514
1515 void execute(VPTransformState &State) override;
1516
1517#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1518 void print(raw_ostream &O, const Twine &Indent,
1519 VPSlotTracker &SlotTracker) const override;
1520#endif
1521
1522 /// Returns the result type of the cast.
1523 Type *getResultType() const { return ResultTy; }
1524
1525 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1526 // At the moment, only uniform codegen is implemented.
1528 "Op must be an operand of the recipe");
1529 return true;
1530 }
1531};
1532
1533/// A recipe for widening Call instructions.
1535 /// ID of the vector intrinsic to call when widening the call. If set the
1536 /// Intrinsic::not_intrinsic, a library call will be used instead.
1537 Intrinsic::ID VectorIntrinsicID;
1538 /// If this recipe represents a library call, Variant stores a pointer to
1539 /// the chosen function. There is a 1:1 mapping between a given VF and the
1540 /// chosen vectorized variant, so there will be a different vplan for each
1541 /// VF with a valid variant.
1542 Function *Variant;
1543
1544public:
1545 template <typename IterT>
1547 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1548 Function *Variant = nullptr)
1549 : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1550 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1551 assert(
1552 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1553 "last operand must be the called function");
1554 }
1555
1556 ~VPWidenCallRecipe() override = default;
1557
1560 VectorIntrinsicID, getDebugLoc(), Variant);
1561 }
1562
1563 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1564
1565 /// Produce a widened version of the call instruction.
1566 void execute(VPTransformState &State) override;
1567
1569 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1570 }
1571
1573 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1574 }
1576 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1577 }
1578
1579#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1580 /// Print the recipe.
1581 void print(raw_ostream &O, const Twine &Indent,
1582 VPSlotTracker &SlotTracker) const override;
1583#endif
1584};
1585
1586/// A recipe for widening select instructions.
1588 template <typename IterT>
1590 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1591 I.getDebugLoc()) {}
1592
1593 ~VPWidenSelectRecipe() override = default;
1594
1596 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1597 operands());
1598 }
1599
1600 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1601
1602 /// Produce a widened version of the select instruction.
1603 void execute(VPTransformState &State) override;
1604
1605#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1606 /// Print the recipe.
1607 void print(raw_ostream &O, const Twine &Indent,
1608 VPSlotTracker &SlotTracker) const override;
1609#endif
1610
1611 VPValue *getCond() const {
1612 return getOperand(0);
1613 }
1614
1615 bool isInvariantCond() const {
1617 }
1618};
1619
1620/// A recipe for handling GEP instructions.
1622 bool isPointerLoopInvariant() const {
1624 }
1625
1626 bool isIndexLoopInvariant(unsigned I) const {
1628 }
1629
1630 bool areAllOperandsInvariant() const {
1631 return all_of(operands(), [](VPValue *Op) {
1632 return Op->isDefinedOutsideVectorRegions();
1633 });
1634 }
1635
1636public:
1637 template <typename IterT>
1639 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1640
1641 ~VPWidenGEPRecipe() override = default;
1642
1644 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1645 operands());
1646 }
1647
1648 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1649
1650 /// Generate the gep nodes.
1651 void execute(VPTransformState &State) override;
1652
1653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1654 /// Print the recipe.
1655 void print(raw_ostream &O, const Twine &Indent,
1656 VPSlotTracker &SlotTracker) const override;
1657#endif
1658};
1659
1660/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1661/// all parts. If IsReverse is true, compute pointers for accessing the input in
1662/// reverse order per part.
1664 Type *IndexedTy;
1665 bool IsReverse;
1666
1667public:
1668 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1669 bool IsInBounds, DebugLoc DL)
1670 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1671 GEPFlagsTy(IsInBounds), DL),
1672 IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1673
1674 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1675
1676 void execute(VPTransformState &State) override;
1677
1678 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1680 "Op must be an operand of the recipe");
1681 return true;
1682 }
1683
1684 /// Returns true if the recipe only uses the first part of operand \p Op.
1685 bool onlyFirstPartUsed(const VPValue *Op) const override {
1687 "Op must be an operand of the recipe");
1688 assert(getNumOperands() == 1 && "must have a single operand");
1689 return true;
1690 }
1691
1693 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1694 isInBounds(), getDebugLoc());
1695 }
1696
1697#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1698 /// Print the recipe.
1699 void print(raw_ostream &O, const Twine &Indent,
1700 VPSlotTracker &SlotTracker) const override;
1701#endif
1702};
1703
1704/// A pure virtual base class for all recipes modeling header phis, including
1705/// phis for first order recurrences, pointer inductions and reductions. The
1706/// start value is the first operand of the recipe and the incoming value from
1707/// the backedge is the second operand.
1708///
1709/// Inductions are modeled using the following sub-classes:
1710/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1711/// starting at a specified value (zero for the main vector loop, the resume
1712/// value for the epilogue vector loop) and stepping by 1. The induction
1713/// controls exiting of the vector loop by comparing against the vector trip
1714/// count. Produces a single scalar PHI for the induction value per
1715/// iteration.
1716/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1717/// floating point inductions with arbitrary start and step values. Produces
1718/// a vector PHI per-part.
1719/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1720/// value of an IV with different start and step values. Produces a single
1721/// scalar value per iteration
1722/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1723/// canonical or derived induction.
1724/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1725/// pointer induction. Produces either a vector PHI per-part or scalar values
1726/// per-lane based on the canonical induction.
1728protected:
1729 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1730 VPValue *Start = nullptr, DebugLoc DL = {})
1731 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1732 if (Start)
1733 addOperand(Start);
1734 }
1735
1736public:
1737 ~VPHeaderPHIRecipe() override = default;
1738
1739 /// Method to support type inquiry through isa, cast, and dyn_cast.
1740 static inline bool classof(const VPRecipeBase *B) {
1741 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1742 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1743 }
1744 static inline bool classof(const VPValue *V) {
1745 auto *B = V->getDefiningRecipe();
1746 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1747 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1748 }
1749
1750 /// Generate the phi nodes.
1751 void execute(VPTransformState &State) override = 0;
1752
1753#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1754 /// Print the recipe.
1755 void print(raw_ostream &O, const Twine &Indent,
1756 VPSlotTracker &SlotTracker) const override = 0;
1757#endif
1758
1759 /// Returns the start value of the phi, if one is set.
1761 return getNumOperands() == 0 ? nullptr : getOperand(0);
1762 }
1764 return getNumOperands() == 0 ? nullptr : getOperand(0);
1765 }
1766
1767 /// Update the start value of the recipe.
1769
1770 /// Returns the incoming value from the loop backedge.
1772 return getOperand(1);
1773 }
1774
1775 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1776 /// to be a recipe.
1779 }
1780};
1781
1782/// A recipe for handling phi nodes of integer and floating-point inductions,
1783/// producing their vector values.
1785 PHINode *IV;
1786 TruncInst *Trunc;
1787 const InductionDescriptor &IndDesc;
1788
1789public:
1791 const InductionDescriptor &IndDesc)
1792 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1793 Trunc(nullptr), IndDesc(IndDesc) {
1794 addOperand(Step);
1795 }
1796
1798 const InductionDescriptor &IndDesc,
1799 TruncInst *Trunc)
1800 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1801 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1802 addOperand(Step);
1803 }
1804
1806
1809 getStepValue(), IndDesc, Trunc);
1810 }
1811
1812 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1813
1814 /// Generate the vectorized and scalarized versions of the phi node as
1815 /// needed by their users.
1816 void execute(VPTransformState &State) override;
1817
1818#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1819 /// Print the recipe.
1820 void print(raw_ostream &O, const Twine &Indent,
1821 VPSlotTracker &SlotTracker) const override;
1822#endif
1823
1825 // TODO: All operands of base recipe must exist and be at same index in
1826 // derived recipe.
1828 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1829 }
1830
1832 // TODO: All operands of base recipe must exist and be at same index in
1833 // derived recipe.
1835 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1836 }
1837
1838 /// Returns the step value of the induction.
1840 const VPValue *getStepValue() const { return getOperand(1); }
1841
1842 /// Returns the first defined value as TruncInst, if it is one or nullptr
1843 /// otherwise.
1844 TruncInst *getTruncInst() { return Trunc; }
1845 const TruncInst *getTruncInst() const { return Trunc; }
1846
1847 PHINode *getPHINode() { return IV; }
1848
1849 /// Returns the induction descriptor for the recipe.
1850 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1851
1852 /// Returns true if the induction is canonical, i.e. starting at 0 and
1853 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
1854 /// same type as the canonical induction.
1855 bool isCanonical() const;
1856
1857 /// Returns the scalar type of the induction.
1859 return Trunc ? Trunc->getType() : IV->getType();
1860 }
1861};
1862
1864 const InductionDescriptor &IndDesc;
1865
1866 bool IsScalarAfterVectorization;
1867
1868public:
1869 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1870 /// Start.
1872 const InductionDescriptor &IndDesc,
1873 bool IsScalarAfterVectorization)
1874 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1875 IndDesc(IndDesc),
1876 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1877 addOperand(Start);
1878 addOperand(Step);
1879 }
1880
1882
1885 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
1886 IndDesc, IsScalarAfterVectorization);
1887 }
1888
1889 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1890
1891 /// Generate vector values for the pointer induction.
1892 void execute(VPTransformState &State) override;
1893
1894 /// Returns true if only scalar values will be generated.
1895 bool onlyScalarsGenerated(bool IsScalable);
1896
1897 /// Returns the induction descriptor for the recipe.
1898 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1899
1900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1901 /// Print the recipe.
1902 void print(raw_ostream &O, const Twine &Indent,
1903 VPSlotTracker &SlotTracker) const override;
1904#endif
1905};
1906
1907/// A recipe for handling phis that are widened in the vector loop.
1908/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1909/// managed in the recipe directly.
1911 /// List of incoming blocks. Only used in the VPlan native path.
1912 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1913
1914public:
1915 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1916 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1917 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1918 if (Start)
1919 addOperand(Start);
1920 }
1921
1923 llvm_unreachable("cloning not implemented yet");
1924 }
1925
1926 ~VPWidenPHIRecipe() override = default;
1927
1928 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1929
1930 /// Generate the phi/select nodes.
1931 void execute(VPTransformState &State) override;
1932
1933#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1934 /// Print the recipe.
1935 void print(raw_ostream &O, const Twine &Indent,
1936 VPSlotTracker &SlotTracker) const override;
1937#endif
1938
1939 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1940 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1941 addOperand(IncomingV);
1942 IncomingBlocks.push_back(IncomingBlock);
1943 }
1944
1945 /// Returns the \p I th incoming VPBasicBlock.
1946 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1947
1948 /// Returns the \p I th incoming VPValue.
1949 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1950};
1951
1952/// A recipe for handling first-order recurrence phis. The start value is the
1953/// first operand of the recipe and the incoming value from the backedge is the
1954/// second operand.
1957 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1958
1959 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1960
1962 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1963 }
1964
1967 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
1968 }
1969
1970 void execute(VPTransformState &State) override;
1971
1972#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1973 /// Print the recipe.
1974 void print(raw_ostream &O, const Twine &Indent,
1975 VPSlotTracker &SlotTracker) const override;
1976#endif
1977};
1978
1979/// A recipe for handling reduction phis. The start value is the first operand
1980/// of the recipe and the incoming value from the backedge is the second
1981/// operand.
1983 /// Descriptor for the reduction.
1984 const RecurrenceDescriptor &RdxDesc;
1985
1986 /// The phi is part of an in-loop reduction.
1987 bool IsInLoop;
1988
1989 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1990 bool IsOrdered;
1991
1992public:
1993 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1994 /// RdxDesc.
1996 VPValue &Start, bool IsInLoop = false,
1997 bool IsOrdered = false)
1998 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1999 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
2000 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2001 }
2002
2003 ~VPReductionPHIRecipe() override = default;
2004
2006 auto *R =
2007 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
2008 *getOperand(0), IsInLoop, IsOrdered);
2009 R->addOperand(getBackedgeValue());
2010 return R;
2011 }
2012
2013 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2014
2016 return R->getVPDefID() == VPDef::VPReductionPHISC;
2017 }
2018
2019 /// Generate the phi/select nodes.
2020 void execute(VPTransformState &State) override;
2021
2022#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2023 /// Print the recipe.
2024 void print(raw_ostream &O, const Twine &Indent,
2025 VPSlotTracker &SlotTracker) const override;
2026#endif
2027
2029 return RdxDesc;
2030 }
2031
2032 /// Returns true, if the phi is part of an ordered reduction.
2033 bool isOrdered() const { return IsOrdered; }
2034
2035 /// Returns true, if the phi is part of an in-loop reduction.
2036 bool isInLoop() const { return IsInLoop; }
2037};
2038
2039/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2040/// instructions.
2042public:
2043 /// The blend operation is a User of the incoming values and of their
2044 /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
2045 /// incoming value does not have a mask associated.
2047 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2048 assert((Operands.size() + 1) % 2 == 0 &&
2049 "Expected an odd number of operands");
2050 }
2051
2052 VPBlendRecipe *clone() override {
2054 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2055 }
2056
2057 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2058
2059 /// Return the number of incoming values, taking into account that the first
2060 /// incoming value has no mask.
2061 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
2062
2063 /// Return incoming value number \p Idx.
2064 VPValue *getIncomingValue(unsigned Idx) const {
2065 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
2066 }
2067
2068 /// Return mask number \p Idx.
2069 VPValue *getMask(unsigned Idx) const {
2070 assert(Idx > 0 && "First index has no mask associated.");
2071 return getOperand(Idx * 2);
2072 }
2073
2074 /// Generate the phi/select nodes.
2075 void execute(VPTransformState &State) override;
2076
2077#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2078 /// Print the recipe.
2079 void print(raw_ostream &O, const Twine &Indent,
2080 VPSlotTracker &SlotTracker) const override;
2081#endif
2082
2083 /// Returns true if the recipe only uses the first lane of operand \p Op.
2084 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2086 "Op must be an operand of the recipe");
2087 // Recursing through Blend recipes only, must terminate at header phi's the
2088 // latest.
2089 return all_of(users(),
2090 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2091 }
2092};
2093
2094/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2095/// or stores into one wide load/store and shuffles. The first operand of a
2096/// VPInterleave recipe is the address, followed by the stored values, followed
2097/// by an optional mask.
2100
2101 /// Indicates if the interleave group is in a conditional block and requires a
2102 /// mask.
2103 bool HasMask = false;
2104
2105 /// Indicates if gaps between members of the group need to be masked out or if
2106 /// unusued gaps can be loaded speculatively.
2107 bool NeedsMaskForGaps = false;
2108
2109public:
2111 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2112 bool NeedsMaskForGaps)
2113 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2114 NeedsMaskForGaps(NeedsMaskForGaps) {
2115 for (unsigned i = 0; i < IG->getFactor(); ++i)
2116 if (Instruction *I = IG->getMember(i)) {
2117 if (I->getType()->isVoidTy())
2118 continue;
2119 new VPValue(I, this);
2120 }
2121
2122 for (auto *SV : StoredValues)
2123 addOperand(SV);
2124 if (Mask) {
2125 HasMask = true;
2126 addOperand(Mask);
2127 }
2128 }
2129 ~VPInterleaveRecipe() override = default;
2130
2132 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2133 NeedsMaskForGaps);
2134 }
2135
2136 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2137
2138 /// Return the address accessed by this recipe.
2139 VPValue *getAddr() const {
2140 return getOperand(0); // Address is the 1st, mandatory operand.
2141 }
2142
2143 /// Return the mask used by this recipe. Note that a full mask is represented
2144 /// by a nullptr.
2145 VPValue *getMask() const {
2146 // Mask is optional and therefore the last, currently 2nd operand.
2147 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2148 }
2149
2150 /// Return the VPValues stored by this interleave group. If it is a load
2151 /// interleave group, return an empty ArrayRef.
2153 // The first operand is the address, followed by the stored values, followed
2154 // by an optional mask.
2157 }
2158
2159 /// Generate the wide load or store, and shuffles.
2160 void execute(VPTransformState &State) override;
2161
2162#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2163 /// Print the recipe.
2164 void print(raw_ostream &O, const Twine &Indent,
2165 VPSlotTracker &SlotTracker) const override;
2166#endif
2167
2169
2170 /// Returns the number of stored operands of this interleave group. Returns 0
2171 /// for load interleave groups.
2172 unsigned getNumStoreOperands() const {
2173 return getNumOperands() - (HasMask ? 2 : 1);
2174 }
2175
2176 /// The recipe only uses the first lane of the address.
2177 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2179 "Op must be an operand of the recipe");
2180 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2181 }
2182
2183 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2184};
2185
2186/// A recipe to represent inloop reduction operations, performing a reduction on
2187/// a vector operand into a scalar value, and adding the result to a chain.
2188/// The Operands are {ChainOp, VecOp, [Condition]}.
2190 /// The recurrence decriptor for the reduction in question.
2191 const RecurrenceDescriptor &RdxDesc;
2192 bool IsOrdered;
2193 /// Whether the reduction is conditional.
2194 bool IsConditional = false;
2195
2196protected:
2197 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2199 VPValue *CondOp, bool IsOrdered)
2200 : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2201 if (CondOp) {
2202 IsConditional = true;
2203 addOperand(CondOp);
2204 }
2205 }
2206
2207public:
2209 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2210 bool IsOrdered)
2211 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2212 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2213 IsOrdered) {}
2214
2215 ~VPReductionRecipe() override = default;
2216
2218 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2219 getVecOp(), getCondOp(), IsOrdered);
2220 }
2221
2222 static inline bool classof(const VPRecipeBase *R) {
2223 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2224 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2225 }
2226
2227 static inline bool classof(const VPUser *U) {
2228 auto *R = dyn_cast<VPRecipeBase>(U);
2229 return R && classof(R);
2230 }
2231
2232 /// Generate the reduction in the loop
2233 void execute(VPTransformState &State) override;
2234
2235#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2236 /// Print the recipe.
2237 void print(raw_ostream &O, const Twine &Indent,
2238 VPSlotTracker &SlotTracker) const override;
2239#endif
2240
2241 /// Return the recurrence decriptor for the in-loop reduction.
2243 return RdxDesc;
2244 }
2245 /// Return true if the in-loop reduction is ordered.
2246 bool isOrdered() const { return IsOrdered; };
2247 /// Return true if the in-loop reduction is conditional.
2248 bool isConditional() const { return IsConditional; };
2249 /// The VPValue of the scalar Chain being accumulated.
2250 VPValue *getChainOp() const { return getOperand(0); }
2251 /// The VPValue of the vector value to be reduced.
2252 VPValue *getVecOp() const { return getOperand(1); }
2253 /// The VPValue of the condition for the block.
2255 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2256 }
2257};
2258
2259/// A recipe to represent inloop reduction operations with vector-predication
2260/// intrinsics, performing a reduction on a vector operand with the explicit
2261/// vector length (EVL) into a scalar value, and adding the result to a chain.
2262/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2264public:
2267 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2269 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2270 R.isOrdered()) {}
2271
2272 ~VPReductionEVLRecipe() override = default;
2273
2275 llvm_unreachable("cloning not implemented yet");
2276 }
2277
2278 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2279
2280 /// Generate the reduction in the loop
2281 void execute(VPTransformState &State) override;
2282
2283#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2284 /// Print the recipe.
2285 void print(raw_ostream &O, const Twine &Indent,
2286 VPSlotTracker &SlotTracker) const override;
2287#endif
2288
2289 /// The VPValue of the explicit vector length.
2290 VPValue *getEVL() const { return getOperand(2); }
2291
2292 /// Returns true if the recipe only uses the first lane of operand \p Op.
2293 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2295 "Op must be an operand of the recipe");
2296 return Op == getEVL();
2297 }
2298};
2299
2300/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2301/// copies of the original scalar type, one per lane, instead of producing a
2302/// single copy of widened type for all lanes. If the instruction is known to be
2303/// uniform only one copy, per lane zero, will be generated.
2305 /// Indicator if only a single replica per lane is needed.
2306 bool IsUniform;
2307
2308 /// Indicator if the replicas are also predicated.
2309 bool IsPredicated;
2310
2311public:
2312 template <typename IterT>
2314 bool IsUniform, VPValue *Mask = nullptr)
2315 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2316 IsUniform(IsUniform), IsPredicated(Mask) {
2317 if (Mask)
2318 addOperand(Mask);
2319 }
2320
2321 ~VPReplicateRecipe() override = default;
2322
2324 auto *Copy =
2325 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2326 isPredicated() ? getMask() : nullptr);
2327 Copy->transferFlags(*this);
2328 return Copy;
2329 }
2330
2331 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2332
2333 /// Generate replicas of the desired Ingredient. Replicas will be generated
2334 /// for all parts and lanes unless a specific part and lane are specified in
2335 /// the \p State.
2336 void execute(VPTransformState &State) override;
2337
2338#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2339 /// Print the recipe.
2340 void print(raw_ostream &O, const Twine &Indent,
2341 VPSlotTracker &SlotTracker) const override;
2342#endif
2343
2344 bool isUniform() const { return IsUniform; }
2345
2346 bool isPredicated() const { return IsPredicated; }
2347
2348 /// Returns true if the recipe only uses the first lane of operand \p Op.
2349 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2351 "Op must be an operand of the recipe");
2352 return isUniform();
2353 }
2354
2355 /// Returns true if the recipe uses scalars of operand \p Op.
2356 bool usesScalars(const VPValue *Op) const override {
2358 "Op must be an operand of the recipe");
2359 return true;
2360 }
2361
2362 /// Returns true if the recipe is used by a widened recipe via an intervening
2363 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2364 /// in a vector.
2365 bool shouldPack() const;
2366
2367 /// Return the mask of a predicated VPReplicateRecipe.
2369 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2370 return getOperand(getNumOperands() - 1);
2371 }
2372
2373 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2374};
2375
2376/// A recipe for generating conditional branches on the bits of a mask.
2378public:
2380 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2381 if (BlockInMask) // nullptr means all-one mask.
2382 addOperand(BlockInMask);
2383 }
2384
2386 return new VPBranchOnMaskRecipe(getOperand(0));
2387 }
2388
2389 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2390
2391 /// Generate the extraction of the appropriate bit from the block mask and the
2392 /// conditional branch.
2393 void execute(VPTransformState &State) override;
2394
2395#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2396 /// Print the recipe.
2397 void print(raw_ostream &O, const Twine &Indent,
2398 VPSlotTracker &SlotTracker) const override {
2399 O << Indent << "BRANCH-ON-MASK ";
2400 if (VPValue *Mask = getMask())
2401 Mask->printAsOperand(O, SlotTracker);
2402 else
2403 O << " All-One";
2404 }
2405#endif
2406
2407 /// Return the mask used by this recipe. Note that a full mask is represented
2408 /// by a nullptr.
2409 VPValue *getMask() const {
2410 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2411 // Mask is optional.
2412 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2413 }
2414
2415 /// Returns true if the recipe uses scalars of operand \p Op.
2416 bool usesScalars(const VPValue *Op) const override {
2418 "Op must be an operand of the recipe");
2419 return true;
2420 }
2421};
2422
2423/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2424/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2425/// order to merge values that are set under such a branch and feed their uses.
2426/// The phi nodes can be scalar or vector depending on the users of the value.
2427/// This recipe works in concert with VPBranchOnMaskRecipe.
2429public:
2430 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2431 /// nodes after merging back from a Branch-on-Mask.
2433 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2434 ~VPPredInstPHIRecipe() override = default;
2435
2437 return new VPPredInstPHIRecipe(getOperand(0));
2438 }
2439
2440 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2441
2442 /// Generates phi nodes for live-outs as needed to retain SSA form.
2443 void execute(VPTransformState &State) override;
2444
2445#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2446 /// Print the recipe.
2447 void print(raw_ostream &O, const Twine &Indent,
2448 VPSlotTracker &SlotTracker) const override;
2449#endif
2450
2451 /// Returns true if the recipe uses scalars of operand \p Op.
2452 bool usesScalars(const VPValue *Op) const override {
2454 "Op must be an operand of the recipe");
2455 return true;
2456 }
2457};
2458
2459/// A common base class for widening memory operations. An optional mask can be
2460/// provided as the last operand.
2462protected:
2464
2465 /// Whether the accessed addresses are consecutive.
2467
2468 /// Whether the consecutive accessed addresses are in reverse order.
2470
2471 /// Whether the memory access is masked.
2472 bool IsMasked = false;
2473
2474 void setMask(VPValue *Mask) {
2475 assert(!IsMasked && "cannot re-set mask");
2476 if (!Mask)
2477 return;
2478 addOperand(Mask);
2479 IsMasked = true;
2480 }
2481
2482 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2483 std::initializer_list<VPValue *> Operands,
2484 bool Consecutive, bool Reverse, DebugLoc DL)
2486 Reverse(Reverse) {
2487 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2488 }
2489
2490public:
2492 llvm_unreachable("cloning not supported");
2493 }
2494
2495 static inline bool classof(const VPRecipeBase *R) {
2496 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2497 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2498 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2499 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2500 }
2501
2502 static inline bool classof(const VPUser *U) {
2503 auto *R = dyn_cast<VPRecipeBase>(U);
2504 return R && classof(R);
2505 }
2506
2507 /// Return whether the loaded-from / stored-to addresses are consecutive.
2508 bool isConsecutive() const { return Consecutive; }
2509
2510 /// Return whether the consecutive loaded/stored addresses are in reverse
2511 /// order.
2512 bool isReverse() const { return Reverse; }
2513
2514 /// Return the address accessed by this recipe.
2515 VPValue *getAddr() const { return getOperand(0); }
2516
2517 /// Returns true if the recipe is masked.
2518 bool isMasked() const { return IsMasked; }
2519
2520 /// Return the mask used by this recipe. Note that a full mask is represented
2521 /// by a nullptr.
2522 VPValue *getMask() const {
2523 // Mask is optional and therefore the last operand.
2524 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2525 }
2526
2527 /// Generate the wide load/store.
2528 void execute(VPTransformState &State) override {
2529 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2530 }
2531
2533};
2534
2535/// A recipe for widening load operations, using the address to load from and an
2536/// optional mask.
2537struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2539 bool Consecutive, bool Reverse, DebugLoc DL)
2540 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2541 Reverse, DL),
2542 VPValue(this, &Load) {
2543 setMask(Mask);
2544 }
2545
2547 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2549 getDebugLoc());
2550 }
2551
2552 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2553
2554 /// Generate a wide load or gather.
2555 void execute(VPTransformState &State) override;
2556
2557#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2558 /// Print the recipe.
2559 void print(raw_ostream &O, const Twine &Indent,
2560 VPSlotTracker &SlotTracker) const override;
2561#endif
2562
2563 /// Returns true if the recipe only uses the first lane of operand \p Op.
2564 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2566 "Op must be an operand of the recipe");
2567 // Widened, consecutive loads operations only demand the first lane of
2568 // their address.
2569 return Op == getAddr() && isConsecutive();
2570 }
2571};
2572
2573/// A recipe for widening load operations with vector-predication intrinsics,
2574/// using the address to load from, the explicit vector length and an optional
2575/// mask.
2576struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2578 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
2579 {L.getAddr(), &EVL}, L.isConsecutive(),
2580 L.isReverse(), L.getDebugLoc()),
2581 VPValue(this, &getIngredient()) {
2582 setMask(Mask);
2583 }
2584
2585 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2586
2587 /// Return the EVL operand.
2588 VPValue *getEVL() const { return getOperand(1); }
2589
2590 /// Generate the wide load or gather.
2591 void execute(VPTransformState &State) override;
2592
2593#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2594 /// Print the recipe.
2595 void print(raw_ostream &O, const Twine &Indent,
2596 VPSlotTracker &SlotTracker) const override;
2597#endif
2598
2599 /// Returns true if the recipe only uses the first lane of operand \p Op.
2600 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2602 "Op must be an operand of the recipe");
2603 // Widened loads only demand the first lane of EVL and consecutive loads
2604 // only demand the first lane of their address.
2605 return Op == getEVL() || (Op == getAddr() && isConsecutive());
2606 }
2607};
2608
2609/// A recipe for widening store operations, using the stored value, the address
2610/// to store to and an optional mask.
2613 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2614 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2616 setMask(Mask);
2617 }
2618
2620 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
2622 Reverse, getDebugLoc());
2623 }
2624
2625 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2626
2627 /// Return the value stored by this recipe.
2628 VPValue *getStoredValue() const { return getOperand(1); }
2629
2630 /// Generate a wide store or scatter.
2631 void execute(VPTransformState &State) override;
2632
2633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2634 /// Print the recipe.
2635 void print(raw_ostream &O, const Twine &Indent,
2636 VPSlotTracker &SlotTracker) const override;
2637#endif
2638
2639 /// Returns true if the recipe only uses the first lane of operand \p Op.
2640 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2642 "Op must be an operand of the recipe");
2643 // Widened, consecutive stores only demand the first lane of their address,
2644 // unless the same operand is also stored.
2645 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2646 }
2647};
2648
2649/// A recipe for widening store operations with vector-predication intrinsics,
2650/// using the value to store, the address to store to, the explicit vector
2651/// length and an optional mask.
2654 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
2655 {S.getAddr(), S.getStoredValue(), &EVL},
2656 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
2657 setMask(Mask);
2658 }
2659
2660 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2661
2662 /// Return the address accessed by this recipe.
2663 VPValue *getStoredValue() const { return getOperand(1); }
2664
2665 /// Return the EVL operand.
2666 VPValue *getEVL() const { return getOperand(2); }
2667
2668 /// Generate the wide store or scatter.
2669 void execute(VPTransformState &State) override;
2670
2671#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2672 /// Print the recipe.
2673 void print(raw_ostream &O, const Twine &Indent,
2674 VPSlotTracker &SlotTracker) const override;
2675#endif
2676
2677 /// Returns true if the recipe only uses the first lane of operand \p Op.
2678 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2680 "Op must be an operand of the recipe");
2681 if (Op == getEVL()) {
2682 assert(getStoredValue() != Op && "unexpected store of EVL");
2683 return true;
2684 }
2685 // Widened, consecutive memory operations only demand the first lane of
2686 // their address, unless the same operand is also stored. That latter can
2687 // happen with opaque pointers.
2688 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2689 }
2690};
2691
2692/// Recipe to expand a SCEV expression.
2694 const SCEV *Expr;
2695 ScalarEvolution &SE;
2696
2697public:
2699 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2700
2701 ~VPExpandSCEVRecipe() override = default;
2702
2704 return new VPExpandSCEVRecipe(Expr, SE);
2705 }
2706
2707 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2708
2709 /// Generate a canonical vector induction variable of the vector loop, with
2710 void execute(VPTransformState &State) override;
2711
2712#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2713 /// Print the recipe.
2714 void print(raw_ostream &O, const Twine &Indent,
2715 VPSlotTracker &SlotTracker) const override;
2716#endif
2717
2718 const SCEV *getSCEV() const { return Expr; }
2719};
2720
2721/// Canonical scalar induction phi of the vector loop. Starting at the specified
2722/// start value (either 0 or the resume value when vectorizing the epilogue
2723/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2724/// canonical induction variable.
2726public:
2728 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2729
2730 ~VPCanonicalIVPHIRecipe() override = default;
2731
2733 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
2734 R->addOperand(getBackedgeValue());
2735 return R;
2736 }
2737
2738 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2739
2741 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2742 }
2743
2744 /// Generate the canonical scalar induction phi of the vector loop.
2745 void execute(VPTransformState &State) override;
2746
2747#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2748 /// Print the recipe.
2749 void print(raw_ostream &O, const Twine &Indent,
2750 VPSlotTracker &SlotTracker) const override;
2751#endif
2752
2753 /// Returns the scalar type of the induction.
2755 return getStartValue()->getLiveInIRValue()->getType();
2756 }
2757
2758 /// Returns true if the recipe only uses the first lane of operand \p Op.
2759 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2761 "Op must be an operand of the recipe");
2762 return true;
2763 }
2764
2765 /// Returns true if the recipe only uses the first part of operand \p Op.
2766 bool onlyFirstPartUsed(const VPValue *Op) const override {
2768 "Op must be an operand of the recipe");
2769 return true;
2770 }
2771
2772 /// Check if the induction described by \p Kind, /p Start and \p Step is
2773 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2775 VPValue *Step) const;
2776};
2777
2778/// A recipe for generating the active lane mask for the vector loop that is
2779/// used to predicate the vector operations.
2780/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2781/// remove VPActiveLaneMaskPHIRecipe.
2783public:
2785 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2786 DL) {}
2787
2788 ~VPActiveLaneMaskPHIRecipe() override = default;
2789
2792 }
2793
2794 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2795
2797 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2798 }
2799
2800 /// Generate the active lane mask phi of the vector loop.
2801 void execute(VPTransformState &State) override;
2802
2803#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2804 /// Print the recipe.
2805 void print(raw_ostream &O, const Twine &Indent,
2806 VPSlotTracker &SlotTracker) const override;
2807#endif
2808};
2809
2810/// A recipe for generating the phi node for the current index of elements,
2811/// adjusted in accordance with EVL value. It starts at the start value of the
2812/// canonical induction and gets incremented by EVL in each iteration of the
2813/// vector loop.
2815public:
2817 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2818
2819 ~VPEVLBasedIVPHIRecipe() override = default;
2820
2822 llvm_unreachable("cloning not implemented yet");
2823 }
2824
2825 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2826
2828 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2829 }
2830
2831 /// Generate phi for handling IV based on EVL over iterations correctly.
2832 /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2833 void execute(VPTransformState &State) override;
2834
2835 /// Returns true if the recipe only uses the first lane of operand \p Op.
2836 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2838 "Op must be an operand of the recipe");
2839 return true;
2840 }
2841
2842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2843 /// Print the recipe.
2844 void print(raw_ostream &O, const Twine &Indent,
2845 VPSlotTracker &SlotTracker) const override;
2846#endif
2847};
2848
2849/// A Recipe for widening the canonical induction variable of the vector loop.
2851public:
2853 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2854
2855 ~VPWidenCanonicalIVRecipe() override = default;
2856
2858 return new VPWidenCanonicalIVRecipe(
2859 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
2860 }
2861
2862 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2863
2864 /// Generate a canonical vector induction variable of the vector loop, with
2865 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2866 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2867 void execute(VPTransformState &State) override;
2868
2869#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2870 /// Print the recipe.
2871 void print(raw_ostream &O, const Twine &Indent,
2872 VPSlotTracker &SlotTracker) const override;
2873#endif
2874};
2875
2876/// A recipe for converting the input value \p IV value to the corresponding
2877/// value of an IV with different start and step values, using Start + IV *
2878/// Step.
2880 /// Kind of the induction.
2882 /// If not nullptr, the floating point induction binary operator. Must be set
2883 /// for floating point inductions.
2884 const FPMathOperator *FPBinOp;
2885
2886public:
2888 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2890 IndDesc.getKind(),
2891 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2892 Start, CanonicalIV, Step) {}
2893
2895 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2896 VPValue *Step)
2897 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2898 FPBinOp(FPBinOp) {}
2899
2900 ~VPDerivedIVRecipe() override = default;
2901
2903 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
2904 getStepValue());
2905 }
2906
2907 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2908
2909 /// Generate the transformed value of the induction at offset StartValue (1.
2910 /// operand) + IV (2. operand) * StepValue (3, operand).
2911 void execute(VPTransformState &State) override;
2912
2913#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2914 /// Print the recipe.
2915 void print(raw_ostream &O, const Twine &Indent,
2916 VPSlotTracker &SlotTracker) const override;
2917#endif
2918
2920 return getStartValue()->getLiveInIRValue()->getType();
2921 }
2922
2923 VPValue *getStartValue() const { return getOperand(0); }
2924 VPValue *getStepValue() const { return getOperand(2); }
2925
2926 /// Returns true if the recipe only uses the first lane of operand \p Op.
2927 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2929 "Op must be an operand of the recipe");
2930 return true;
2931 }
2932};
2933
2934/// A recipe for handling phi nodes of integer and floating-point inductions,
2935/// producing their scalar values.
2937 Instruction::BinaryOps InductionOpcode;
2938
2939public:
2942 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2943 ArrayRef<VPValue *>({IV, Step}), FMFs),
2944 InductionOpcode(Opcode) {}
2945
2947 VPValue *Step)
2949 IV, Step, IndDesc.getInductionOpcode(),
2950 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2951 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2952 : FastMathFlags()) {}
2953
2954 ~VPScalarIVStepsRecipe() override = default;
2955
2957 return new VPScalarIVStepsRecipe(
2958 getOperand(0), getOperand(1), InductionOpcode,
2960 }
2961
2962 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2963
2964 /// Generate the scalarized versions of the phi node as needed by their users.
2965 void execute(VPTransformState &State) override;
2966
2967#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2968 /// Print the recipe.
2969 void print(raw_ostream &O, const Twine &Indent,
2970 VPSlotTracker &SlotTracker) const override;
2971#endif
2972
2973 VPValue *getStepValue() const { return getOperand(1); }
2974
2975 /// Returns true if the recipe only uses the first lane of operand \p Op.
2976 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2978 "Op must be an operand of the recipe");
2979 return true;
2980 }
2981};
2982
2983/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2984/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2985/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2987public:
2989
2990protected:
2991 /// The VPRecipes held in the order of output instructions to generate.
2993
2994 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
2995 : VPBlockBase(BlockSC, Name.str()) {}
2996
2997public:
2998 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2999 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3000 if (Recipe)
3001 appendRecipe(Recipe);
3002 }
3003
3004 ~VPBasicBlock() override {
3005 while (!Recipes.empty())
3006 Recipes.pop_back();
3007 }
3008
3009 /// Instruction iterators...
3014
3015 //===--------------------------------------------------------------------===//
3016 /// Recipe iterator methods
3017 ///
3018 inline iterator begin() { return Recipes.begin(); }
3019 inline const_iterator begin() const { return Recipes.begin(); }
3020 inline iterator end() { return Recipes.end(); }
3021 inline const_iterator end() const { return Recipes.end(); }
3022
3023 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3024 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3025 inline reverse_iterator rend() { return Recipes.rend(); }
3026 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3027
3028 inline size_t size() const { return Recipes.size(); }
3029 inline bool empty() const { return Recipes.empty(); }
3030 inline const VPRecipeBase &front() const { return Recipes.front(); }
3031 inline VPRecipeBase &front() { return Recipes.front(); }
3032 inline const VPRecipeBase &back() const { return Recipes.back(); }
3033 inline VPRecipeBase &back() { return Recipes.back(); }
3034
3035 /// Returns a reference to the list of recipes.
3037
3038 /// Returns a pointer to a member of the recipe list.
3040 return &VPBasicBlock::Recipes;
3041 }
3042
3043 /// Method to support type inquiry through isa, cast, and dyn_cast.
3044 static inline bool classof(const VPBlockBase *V) {
3045 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3046 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3047 }
3048
3049 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3050 assert(Recipe && "No recipe to append.");
3051 assert(!Recipe->Parent && "Recipe already in VPlan");
3052 Recipe->Parent = this;
3053 Recipes.insert(InsertPt, Recipe);
3054 }
3055
3056 /// Augment the existing recipes of a VPBasicBlock with an additional
3057 /// \p Recipe as the last recipe.
3058 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3059
3060 /// The method which generates the output IR instructions that correspond to
3061 /// this VPBasicBlock, thereby "executing" the VPlan.
3062 void execute(VPTransformState *State) override;
3063
3064 /// Return the cost of this VPBasicBlock.
3066
3067 /// Return the position of the first non-phi node recipe in the block.
3069
3070 /// Returns an iterator range over the PHI-like recipes in the block.
3072 return make_range(begin(), getFirstNonPhi());
3073 }
3074
3075 void dropAllReferences(VPValue *NewValue) override;
3076
3077 /// Split current block at \p SplitAt by inserting a new block between the
3078 /// current block and its successors and moving all recipes starting at
3079 /// SplitAt to the new block. Returns the new block.
3080 VPBasicBlock *splitAt(iterator SplitAt);
3081
3083
3084#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3085 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3086 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3087 ///
3088 /// Note that the numbering is applied to the whole VPlan, so printing
3089 /// individual blocks is consistent with the whole VPlan printing.
3090 void print(raw_ostream &O, const Twine &Indent,
3091 VPSlotTracker &SlotTracker) const override;
3092 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3093#endif
3094
3095 /// If the block has multiple successors, return the branch recipe terminating
3096 /// the block. If there are no or only a single successor, return nullptr;
3098 const VPRecipeBase *getTerminator() const;
3099
3100 /// Returns true if the block is exiting it's parent region.
3101 bool isExiting() const;
3102
3103 /// Clone the current block and it's recipes, without updating the operands of
3104 /// the cloned recipes.
3105 VPBasicBlock *clone() override {
3106 auto *NewBlock = new VPBasicBlock(getName());
3107 for (VPRecipeBase &R : *this)
3108 NewBlock->appendRecipe(R.clone());
3109 return NewBlock;
3110 }
3111
3112protected:
3113 /// Execute the recipes in the IR basic block \p BB.
3114 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3115
3116private:
3117 /// Create an IR BasicBlock to hold the output instructions generated by this
3118 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3119 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3120};
3121
3122/// A special type of VPBasicBlock that wraps an existing IR basic block.
3123/// Recipes of the block get added before the first non-phi instruction in the
3124/// wrapped block.
3125/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3126/// preheader block.
3128 BasicBlock *IRBB;
3129
3130public:
3132 : VPBasicBlock(VPIRBasicBlockSC,
3133 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3134 IRBB(IRBB) {}
3135
3136 ~VPIRBasicBlock() override {}
3137
3138 static inline bool classof(const VPBlockBase *V) {
3139 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3140 }
3141
3142 /// The method which generates the output IR instructions that correspond to
3143 /// this VPBasicBlock, thereby "executing" the VPlan.
3144 void execute(VPTransformState *State) override;
3145
3146 VPIRBasicBlock *clone() override {
3147 auto *NewBlock = new VPIRBasicBlock(IRBB);
3148 for (VPRecipeBase &R : Recipes)
3149 NewBlock->appendRecipe(R.clone());
3150 return NewBlock;
3151 }
3152
3153 BasicBlock *getIRBasicBlock() const { return IRBB; }
3154};
3155
3156/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3157/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3158/// A VPRegionBlock may indicate that its contents are to be replicated several
3159/// times. This is designed to support predicated scalarization, in which a
3160/// scalar if-then code structure needs to be generated VF * UF times. Having
3161/// this replication indicator helps to keep a single model for multiple
3162/// candidate VF's. The actual replication takes place only once the desired VF
3163/// and UF have been determined.
3165 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3166 VPBlockBase *Entry;
3167
3168 /// Hold the Single Exiting block of the SESE region modelled by the
3169 /// VPRegionBlock.
3170 VPBlockBase *Exiting;
3171
3172 /// An indicator whether this region is to generate multiple replicated
3173 /// instances of output IR corresponding to its VPBlockBases.
3174 bool IsReplicator;
3175
3176public:
3178 const std::string &Name = "", bool IsReplicator = false)
3179 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3180 IsReplicator(IsReplicator) {
3181 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3182 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3183 Entry->setParent(this);
3184 Exiting->setParent(this);
3185 }
3186 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3187 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3188 IsReplicator(IsReplicator) {}
3189
3190 ~VPRegionBlock() override {
3191 if (Entry) {
3192 VPValue DummyValue;
3193 Entry->dropAllReferences(&DummyValue);
3194 deleteCFG(Entry);
3195 }
3196 }
3197
3198 /// Method to support type inquiry through isa, cast, and dyn_cast.
3199 static inline bool classof(const VPBlockBase *V) {
3200 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3201 }
3202
3203 const VPBlockBase *getEntry() const { return Entry; }
3204 VPBlockBase *getEntry() { return Entry; }
3205
3206 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3207 /// EntryBlock must have no predecessors.
3208 void setEntry(VPBlockBase *EntryBlock) {
3209 assert(EntryBlock->getPredecessors().empty() &&
3210 "Entry block cannot have predecessors.");
3211 Entry = EntryBlock;
3212 EntryBlock->setParent(this);
3213 }
3214
3215 const VPBlockBase *getExiting() const { return Exiting; }
3216 VPBlockBase *getExiting() { return Exiting; }
3217
3218 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3219 /// ExitingBlock must have no successors.
3220 void setExiting(VPBlockBase *ExitingBlock) {
3221 assert(ExitingBlock->getSuccessors().empty() &&
3222 "Exit block cannot have successors.");
3223 Exiting = ExitingBlock;
3224 ExitingBlock->setParent(this);
3225 }
3226
3227 /// Returns the pre-header VPBasicBlock of the loop region.
3229 assert(!isReplicator() && "should only get pre-header of loop regions");
3231 }
3232
3233 /// An indicator whether this region is to generate multiple replicated
3234 /// instances of output IR corresponding to its VPBlockBases.
3235 bool isReplicator() const { return IsReplicator; }
3236
3237 /// The method which generates the output IR instructions that correspond to
3238 /// this VPRegionBlock, thereby "executing" the VPlan.
3239 void execute(VPTransformState *State) override;
3240
3241 // Return the cost of this region.
3243
3244 void dropAllReferences(VPValue *NewValue) override;
3245
3246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3247 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3248 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3249 /// consequtive numbers.
3250 ///
3251 /// Note that the numbering is applied to the whole VPlan, so printing
3252 /// individual regions is consistent with the whole VPlan printing.
3253 void print(raw_ostream &O, const Twine &Indent,
3254 VPSlotTracker &SlotTracker) const override;
3255 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3256#endif
3257
3258 /// Clone all blocks in the single-entry single-exit region of the block and
3259 /// their recipes without updating the operands of the cloned recipes.
3260 VPRegionBlock *clone() override;
3261};
3262
3263/// VPlan models a candidate for vectorization, encoding various decisions take
3264/// to produce efficient output IR, including which branches, basic-blocks and
3265/// output IR instructions to generate, and their cost. VPlan holds a
3266/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3267/// VPBasicBlock.
3268class VPlan {
3269 friend class VPlanPrinter;
3270 friend class VPSlotTracker;
3271
3272 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3273 /// preheader of the vector loop.
3274 VPBasicBlock *Entry;
3275
3276 /// VPBasicBlock corresponding to the original preheader. Used to place
3277 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3278 /// rest of VPlan execution.
3279 VPBasicBlock *Preheader;
3280
3281 /// Holds the VFs applicable to this VPlan.
3283
3284 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3285 /// any UF.
3287
3288 /// Holds the name of the VPlan, for printing.
3289 std::string Name;
3290
3291 /// Represents the trip count of the original loop, for folding
3292 /// the tail.
3293 VPValue *TripCount = nullptr;
3294
3295 /// Represents the backedge taken count of the original loop, for folding
3296 /// the tail. It equals TripCount - 1.
3297 VPValue *BackedgeTakenCount = nullptr;
3298
3299 /// Represents the vector trip count.
3300 VPValue VectorTripCount;
3301
3302 /// Represents the loop-invariant VF * UF of the vector loop region.
3303 VPValue VFxUF;
3304
3305 /// Holds a mapping between Values and their corresponding VPValue inside
3306 /// VPlan.
3307 Value2VPValueTy Value2VPValue;
3308
3309 /// Contains all the external definitions created for this VPlan. External
3310 /// definitions are VPValues that hold a pointer to their underlying IR.
3311 SmallVector<VPValue *, 16> VPLiveInsToFree;
3312
3313 /// Values used outside the plan. It contains live-outs that need fixing. Any
3314 /// live-out that is fixed outside VPlan needs to be removed. The remaining
3315 /// live-outs are fixed via VPLiveOut::fixPhi.
3317
3318 /// Mapping from SCEVs to the VPValues representing their expansions.
3319 /// NOTE: This mapping is temporary and will be removed once all users have
3320 /// been modeled in VPlan directly.
3321 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3322
3323public:
3324 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3325 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3326 /// be disconnected, as the bypass blocks between them are not yet modeled in
3327 /// VPlan.
3328 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3329 : VPlan(Preheader, Entry) {
3330 TripCount = TC;
3331 }
3332
3333 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
3334 /// the plan. At the moment, \p Preheader and \p Entry need to be
3335 /// disconnected, as the bypass blocks between them are not yet modeled in
3336 /// VPlan.
3337 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3338 : Entry(Entry), Preheader(Preheader) {
3339 Entry->setPlan(this);
3340 Preheader->setPlan(this);
3341 assert(Preheader->getNumSuccessors() == 0 &&
3342 Preheader->getNumPredecessors() == 0 &&
3343 "preheader must be disconnected");
3344 }
3345
3346 ~VPlan();
3347
3348 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3349 /// original scalar pre-header ) which contains SCEV expansions that need
3350 /// to happen before the CFG is modified; a VPBasicBlock for the vector
3351 /// pre-header, followed by a region for the vector loop, followed by the
3352 /// middle VPBasicBlock. If a check is needed to guard executing the scalar
3353 /// epilogue loop, it will be added to the middle block, together with
3354 /// VPBasicBlocks for the scalar preheader and exit blocks.
3355 static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3356 ScalarEvolution &PSE,
3357 bool RequiresScalarEpilogueCheck,
3358 bool TailFolded, Loop *TheLoop);
3359
3360 /// Prepare the plan for execution, setting up the required live-in values.
3361 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3362 Value *CanonicalIVStartValue, VPTransformState &State);
3363
3364 /// Generate the IR code for this VPlan.
3365 void execute(VPTransformState *State);
3366
3367 /// Return the cost of this plan.
3369
3370 VPBasicBlock *getEntry() { return Entry; }
3371 const VPBasicBlock *getEntry() const { return Entry; }
3372
3373 /// The trip count of the original loop.
3375 assert(TripCount && "trip count needs to be set before accessing it");
3376 return TripCount;
3377 }
3378
3379 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3380 /// the original trip count have been replaced.
3381 void resetTripCount(VPValue *NewTripCount) {
3382 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3383 "TripCount always must be set");
3384 TripCount = NewTripCount;
3385 }
3386
3387 /// The backedge taken count of the original loop.
3389 if (!BackedgeTakenCount)
3390 BackedgeTakenCount = new VPValue();
3391 return BackedgeTakenCount;
3392 }
3393
3394 /// The vector trip count.
3395 VPValue &getVectorTripCount() { return VectorTripCount; }
3396
3397 /// Returns VF * UF of the vector loop region.
3398 VPValue &getVFxUF() { return VFxUF; }
3399
3400 void addVF(ElementCount VF) { VFs.insert(VF); }
3401
3403 assert(hasVF(VF) && "Cannot set VF not already in plan");
3404 VFs.clear();
3405 VFs.insert(VF);
3406 }
3407
3408 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3410 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3411 }
3412
3413 /// Returns an iterator range over all VFs of the plan.
3416 return {VFs.begin(), VFs.end()};
3417 }
3418
3419 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3420
3421 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3422
3423 void setUF(unsigned UF) {
3424 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3425 UFs.clear();
3426 UFs.insert(UF);
3427 }
3428
3429 /// Return a string with the name of the plan and the applicable VFs and UFs.
3430 std::string getName() const;
3431
3432 void setName(const Twine &newName) { Name = newName.str(); }
3433
3434 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3435 /// yet) for \p V.
3437 assert(V && "Trying to get or add the VPValue of a null Value");
3438 if (!Value2VPValue.count(V)) {
3439 VPValue *VPV = new VPValue(V);
3440 VPLiveInsToFree.push_back(VPV);
3441 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3442 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3443 Value2VPValue[V] = VPV;
3444 }
3445
3446 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3447 assert(Value2VPValue[V]->isLiveIn() &&
3448 "Only live-ins should be in mapping");
3449 return Value2VPValue[V];
3450 }
3451
3452 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3453 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3454
3455#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3456 /// Print the live-ins of this VPlan to \p O.
3457 void printLiveIns(raw_ostream &O) const;
3458
3459 /// Print this VPlan to \p O.
3460 void print(raw_ostream &O) const;
3461
3462 /// Print this VPlan in DOT format to \p O.
3463 void printDOT(raw_ostream &O) const;
3464
3465 /// Dump the plan to stderr (for debugging).
3466 LLVM_DUMP_METHOD void dump() const;
3467#endif
3468
3469 /// Returns the VPRegionBlock of the vector loop.
3471 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3472 }
3474 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3475 }
3476
3477 /// Returns the canonical induction recipe of the vector loop.
3480 if (EntryVPBB->empty()) {
3481 // VPlan native path.
3482 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
3483 }
3484 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
3485 }
3486
3487 void addLiveOut(PHINode *PN, VPValue *V);
3488
3490 return LiveOuts;
3491 }
3492
3493 VPValue *getSCEVExpansion(const SCEV *S) const {
3494 return SCEVToExpansion.lookup(S);
3495 }
3496
3497 void addSCEVExpansion(const SCEV *S, VPValue *V) {
3498 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3499 SCEVToExpansion[S] = V;
3500 }
3501
3502 /// \return The block corresponding to the original preheader.
3503 VPBasicBlock *getPreheader() { return Preheader; }
3504 const VPBasicBlock *getPreheader() const { return Preheader; }
3505
3506 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3507 /// recipes to refer to the clones, and return it.
3508 VPlan *duplicate();
3509};
3510
3511#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3512/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3513/// indented and follows the dot format.
3515 raw_ostream &OS;
3516 const VPlan &Plan;
3517 unsigned Depth = 0;
3518 unsigned TabWidth = 2;
3519 std::string Indent;
3520 unsigned BID = 0;
3522
3524
3525 /// Handle indentation.
3526 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3527
3528 /// Print a given \p Block of the Plan.
3529 void dumpBlock(const VPBlockBase *Block);
3530
3531 /// Print the information related to the CFG edges going out of a given
3532 /// \p Block, followed by printing the successor blocks themselves.
3533 void dumpEdges(const VPBlockBase *Block);
3534
3535 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3536 /// its successor blocks.
3537 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3538
3539 /// Print a given \p Region of the Plan.
3540 void dumpRegion(const VPRegionBlock *Region);
3541
3542 unsigned getOrCreateBID(const VPBlockBase *Block) {
3543 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
3544 }
3545
3546 Twine getOrCreateName(const VPBlockBase *Block);
3547
3548 Twine getUID(const VPBlockBase *Block);
3549
3550 /// Print the information related to a CFG edge between two VPBlockBases.
3551 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3552 const Twine &Label);
3553
3554public:
3556 : OS(O), Plan(P), SlotTracker(&P) {}
3557
3558 LLVM_DUMP_METHOD void dump();
3559};
3560
3562 const Value *V;
3563
3564 VPlanIngredient(const Value *V) : V(V) {}
3565
3566 void print(raw_ostream &O) const;
3567};
3568
3570 I.print(OS);
3571 return OS;
3572}
3573
3575 Plan.print(OS);
3576 return OS;
3577}
3578#endif
3579
3580//===----------------------------------------------------------------------===//
3581// VPlan Utilities
3582//===----------------------------------------------------------------------===//
3583
3584/// Class that provides utilities for VPBlockBases in VPlan.
3586public:
3587 VPBlockUtils() = delete;
3588
3589 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3590 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3591 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3592 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3593 /// have neither successors nor predecessors.
3594 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3595 assert(NewBlock->getSuccessors().empty() &&
3596 NewBlock->getPredecessors().empty() &&
3597 "Can't insert new block with predecessors or successors.");
3598 NewBlock->setParent(BlockPtr->getParent());
3599 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3600 for (VPBlockBase *Succ : Succs) {
3601 disconnectBlocks(BlockPtr, Succ);
3602 connectBlocks(NewBlock, Succ);
3603 }
3604 connectBlocks(BlockPtr, NewBlock);
3605 }
3606
3607 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3608 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3609 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3610 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3611 /// and \p IfTrue and \p IfFalse must have neither successors nor
3612 /// predecessors.
3613 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3614 VPBlockBase *BlockPtr) {
3615 assert(IfTrue->getSuccessors().empty() &&
3616 "Can't insert IfTrue with successors.");
3617 assert(IfFalse->getSuccessors().empty() &&
3618 "Can't insert IfFalse with successors.");
3619 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3620 IfTrue->setPredecessors({BlockPtr});
3621 IfFalse->setPredecessors({BlockPtr});
3622 IfTrue->setParent(BlockPtr->getParent());
3623 IfFalse->setParent(BlockPtr->getParent());
3624 }
3625
3626 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3627 /// the successors of \p From and \p From to the predecessors of \p To. Both
3628 /// VPBlockBases must have the same parent, which can be null. Both
3629 /// VPBlockBases can be already connected to other VPBlockBases.
3631 assert((From->getParent() == To->getParent()) &&
3632 "Can't connect two block with different parents");
3633 assert(From->getNumSuccessors() < 2 &&
3634 "Blocks can't have more than two successors.");
3635 From->appendSuccessor(To);
3636 To->appendPredecessor(From);
3637 }
3638
3639 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3640 /// from the successors of \p From and \p From from the predecessors of \p To.
3642 assert(To && "Successor to disconnect is null.");
3643 From->removeSuccessor(To);
3644 To->removePredecessor(From);
3645 }
3646
3647 /// Return an iterator range over \p Range which only includes \p BlockTy
3648 /// blocks. The accesses are casted to \p BlockTy.
3649 template <typename BlockTy, typename T>
3650 static auto blocksOnly(const T &Range) {
3651 // Create BaseTy with correct const-ness based on BlockTy.
3652 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3653 const VPBlockBase, VPBlockBase>;
3654
3655 // We need to first create an iterator range over (const) BlocktTy & instead
3656 // of (const) BlockTy * for filter_range to work properly.
3657 auto Mapped =
3658 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3660 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3661 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3662 return cast<BlockTy>(&Block);
3663 });
3664 }
3665};
3666
3669 InterleaveGroupMap;
3670
3671 /// Type for mapping of instruction based interleave groups to VPInstruction
3672 /// interleave groups
3675
3676 /// Recursively \p Region and populate VPlan based interleave groups based on
3677 /// \p IAI.
3678 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3680 /// Recursively traverse \p Block and populate VPlan based interleave groups
3681 /// based on \p IAI.
3682 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3684
3685public:
3687
3690 // Avoid releasing a pointer twice.
3691 for (auto &I : InterleaveGroupMap)
3692 DelSet.insert(I.second);
3693 for (auto *Ptr : DelSet)
3694 delete Ptr;
3695 }
3696
3697 /// Get the interleave group that \p Instr belongs to.
3698 ///
3699 /// \returns nullptr if doesn't have such group.
3702 return InterleaveGroupMap.lookup(Instr);
3703 }
3704};
3705
3706/// Class that maps (parts of) an existing VPlan to trees of combined
3707/// VPInstructions.
3709 enum class OpMode { Failed, Load, Opcode };
3710
3711 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3712 /// DenseMap keys.
3713 struct BundleDenseMapInfo {
3714 static SmallVector<VPValue *, 4> getEmptyKey() {
3715 return {reinterpret_cast<VPValue *>(-1)};
3716 }
3717
3718 static SmallVector<VPValue *, 4> getTombstoneKey() {
3719 return {reinterpret_cast<VPValue *>(-2)};
3720 }
3721
3722 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3723 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3724 }
3725
3726 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3728 return LHS == RHS;
3729 }
3730 };
3731
3732 /// Mapping of values in the original VPlan to a combined VPInstruction.
3734 BundleToCombined;
3735
3737
3738 /// Basic block to operate on. For now, only instructions in a single BB are
3739 /// considered.
3740 const VPBasicBlock &BB;
3741
3742 /// Indicates whether we managed to combine all visited instructions or not.
3743 bool CompletelySLP = true;
3744
3745 /// Width of the widest combined bundle in bits.
3746 unsigned WidestBundleBits = 0;
3747
3748 using MultiNodeOpTy =
3749 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3750
3751 // Input operand bundles for the current multi node. Each multi node operand
3752 // bundle contains values not matching the multi node's opcode. They will
3753 // be reordered in reorderMultiNodeOps, once we completed building a
3754 // multi node.
3755 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3756
3757 /// Indicates whether we are building a multi node currently.
3758 bool MultiNodeActive = false;
3759
3760 /// Check if we can vectorize Operands together.
3761 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3762
3763 /// Add combined instruction \p New for the bundle \p Operands.
3764 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3765
3766 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3767 VPInstruction *markFailed();
3768
3769 /// Reorder operands in the multi node to maximize sequential memory access
3770 /// and commutative operations.
3771 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3772
3773 /// Choose the best candidate to use for the lane after \p Last. The set of
3774 /// candidates to choose from are values with an opcode matching \p Last's
3775 /// or loads consecutive to \p Last.
3776 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3777 SmallPtrSetImpl<VPValue *> &Candidates,
3779
3780#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3781 /// Print bundle \p Values to dbgs().
3782 void dumpBundle(ArrayRef<VPValue *> Values);
3783#endif
3784
3785public:
3786 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3787
3788 ~VPlanSlp() = default;
3789
3790 /// Tries to build an SLP tree rooted at \p Operands and returns a
3791 /// VPInstruction combining \p Operands, if they can be combined.
3793
3794 /// Return the width of the widest combined bundle in bits.
3795 unsigned getWidestBundleBits() const { return WidestBundleBits; }
3796
3797 /// Return true if all visited instruction can be combined.
3798 bool isCompletelySLP() const { return CompletelySLP; }
3799};
3800
3801namespace vputils {
3802
3803/// Returns true if only the first lane of \p Def is used.
3804bool onlyFirstLaneUsed(const VPValue *Def);
3805
3806/// Returns true if only the first part of \p Def is used.
3807bool onlyFirstPartUsed(const VPValue *Def);
3808
3809/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3810/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3811/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3812/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3813/// create a new one.
3815 ScalarEvolution &SE);
3816
3817/// Returns true if \p VPV is uniform after vectorization.
3818inline bool isUniformAfterVectorization(const VPValue *VPV) {
3819 // A value defined outside the vector region must be uniform after
3820 // vectorization inside a vector region.
3822 return true;
3823 const VPRecipeBase *Def = VPV->getDefiningRecipe();
3824 assert(Def && "Must have definition for value defined inside vector region");
3825 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3826 return Rep->isUniform();
3827 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3828 return all_of(GEP->operands(), isUniformAfterVectorization);
3829 if (auto *VPI = dyn_cast<VPInstruction>(Def))
3830 return VPI->isSingleScalar() || VPI->isVectorToScalar();
3831 return false;
3832}
3833
3834/// Return true if \p V is a header mask in \p Plan.
3835bool isHeaderMask(const VPValue *V, VPlan &Plan);
3836} // end namespace vputils
3837
3838} // end namespace llvm
3839
3840#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:391
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:533
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1309
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:873
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:468
uint32_t getFactor() const
Definition: VectorUtils.h:484
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:538
InstTy * getInsertPos() const
Definition: VectorUtils.h:554
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:610
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:70
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:697
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:124
ElementCount operator*() const
Definition: VPlan.h:132
iterator & operator++()
Definition: VPlan.h:134
iterator(ElementCount VF)
Definition: VPlan.h:128
bool operator==(const iterator &Other) const
Definition: VPlan.h:130
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2782
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2790
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2796
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:2784
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2986
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3011
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3058
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3105
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3013
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3010
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:488
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3036
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:2994
iterator end()
Definition: VPlan.h:3020
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:2998
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3018
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3012
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3071
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:785
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:217
~VPBasicBlock() override
Definition: VPlan.h:3004
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:580
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:532
const_reverse_iterator rbegin() const
Definition: VPlan.h:3024
reverse_iterator rend()
Definition: VPlan.h:3025
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:555
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:2992
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:542
VPRecipeBase & back()
Definition: VPlan.h:3033
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:647
const VPRecipeBase & front() const
Definition: VPlan.h:3030
const_iterator begin() const
Definition: VPlan.h:3019
VPRecipeBase & front()
Definition: VPlan.h:3031
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:630
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:618
const VPRecipeBase & back() const
Definition: VPlan.h:3032
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3049
bool empty() const
Definition: VPlan.h:3029
const_iterator end() const
Definition: VPlan.h:3021
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3044
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3039
reverse_iterator rbegin()
Definition: VPlan.h:3023
size_t size() const
Definition: VPlan.h:3028
const_reverse_iterator rend() const
Definition: VPlan.h:3026
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2041
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2046
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2084
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2064
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2069
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:2061
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2052
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:437
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:632
VPRegionBlock * getParent()
Definition: VPlan.h:509
VPBlocksTy & getPredecessors()
Definition: VPlan.h:540
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:182
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:690
void setName(const Twine &newName)
Definition: VPlan.h:502
size_t getNumSuccessors() const
Definition: VPlan.h:554
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:537
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:635
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:655
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:680
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:590
size_t getNumPredecessors() const
Definition: VPlan.h:555
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:623
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:204
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:539
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:212
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:494
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:155
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:174
const VPRegionBlock * getParent() const
Definition: VPlan.h:510
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:666
const std::string & getName() const
Definition: VPlan.h:500
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:642
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:580
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:614
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:550
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:574
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:639
unsigned getVPBlockID() const
Definition: VPlan.h:507
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:486
VPBlocksTy & getSuccessors()
Definition: VPlan.h:535
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:196
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:160
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:603
void setParent(VPRegionBlock *P)
Definition: VPlan.h:520
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:596
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:544
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:534
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:3585
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:3650
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:3594
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:3613
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3641
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3630
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2377
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2409
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2397
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2379
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2385
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2416
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:2725
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:2766
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2740
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2732
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:2727
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2759
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2754
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:307
unsigned getVPDefID() const
Definition: VPlanValue.h:428
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:2879
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2894
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2924
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Definition: VPlan.h:2887
Type * getScalarType() const
Definition: VPlan.h:2919
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2902
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2927
VPValue * getStartValue() const
Definition: VPlan.h:2923
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:2814
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2827
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2821
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:2816
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2836
Recipe to expand a SCEV expression.
Definition: VPlan.h:2693
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:2698
const SCEV * getSCEV() const
Definition: VPlan.h:2718
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2703
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1727
static bool classof(const VPValue *V)
Definition: VPlan.h:1744
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:1729
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1771
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1760
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1768
VPValue * getStartValue() const
Definition: VPlan.h:1763
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1740
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1777
~VPHeaderPHIRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3127
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:457
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3131
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3153
~VPIRBasicBlock() override
Definition: VPlan.h:3136
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3138
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3146
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1233
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1308
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1338
bool hasResult() const
Definition: VPlan.h:1369
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1345
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1320
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1313
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1325
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1251
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1239
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1254
@ CalculateTripCountMinusVF
Definition: VPlan.h:1252
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:1362
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2098
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2177
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2139
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2110
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2145
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2131
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2152
Instruction * getInsertPos() const
Definition: VPlan.h:2183
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2168
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2172
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:3701
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:156
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:196
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:231
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:75
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:178
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:212
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:182
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:215
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:202
static VPLane getFirstLane()
Definition: VPlan.h:180
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:159
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:218
A value that is used outside the VPlan.
Definition: VPlan.h:704
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:708
static bool classof(const VPUser *U)
Definition: VPlan.h:711
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:722
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:728
void fixPhi(VPlan &Plan, VPTransformState &State)
Fix the wrapped phi node.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2428
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2452
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2432
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2436
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:766
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:857
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:791
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:862
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:833
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:777
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:792
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:838
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:782
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:846
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:968
ExactFlagsTy ExactFlags
Definition: VPlan.h:1024
FastMathFlagsTy FMFs
Definition: VPlan.h:1027
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1101
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:1026
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:1021
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1076
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1153
bool isInBounds() const
Definition: VPlan.h:1195
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1107
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1088
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1122
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1202
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:1046
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:1023
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1082
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1094
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:1032
WrapFlagsTy WrapFlags
Definition: VPlan.h:1022
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1206
bool isDisjoint() const
Definition: VPlan.h:1218
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1189
bool hasNoSignedWrap() const
Definition: VPlan.h:1212
static bool classof(const VPUser *U)
Definition: VPlan.h:1116
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:1039
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2263
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2293
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2290
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2265
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2274
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:1982
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1995
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2033
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2005
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2036
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2015
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2028
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2189
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2248
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2222
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2252
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2242
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2208
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2254
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2197
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2246
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2250
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2217
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2227
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3164
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:718
const VPBlockBase * getEntry() const
Definition: VPlan.h:3203
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3235
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:727
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3220
VPBlockBase * getExiting()
Definition: VPlan.h:3216
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3208
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:792
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:828
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3186
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3177
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:734
const VPBlockBase * getExiting() const
Definition: VPlan.h:3215
VPBlockBase * getEntry()
Definition: VPlan.h:3204
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3228
~VPRegionBlock() override
Definition: VPlan.h:3190
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3199
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2304
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2349
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2356
bool isUniform() const
Definition: VPlan.h:2344
bool isPredicated() const
Definition: VPlan.h:2346
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2323
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2313
unsigned getOpcode() const
Definition: VPlan.h:2373
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2368
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1495
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1509
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1525
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1523
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1503
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:2936
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2976
VPValue * getStepValue() const
Definition: VPlan.h:2973
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2946
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2956
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:2940
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:895
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:901
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:959
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:910
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:962
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:898
static bool classof(const VPUser *U)
Definition: VPlan.h:951
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:906
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:449
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:39
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:202
operand_range operands()
Definition: VPlanValue.h:272
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:257
unsigned getNumOperands() const
Definition: VPlanValue.h:251
operand_iterator op_begin()
Definition: VPlanValue.h:268
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:252
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:246
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:125
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:186
A recipe to compute the pointers for widened memory accesses of IndexTy for all parts.
Definition: VPlan.h:1663
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, bool IsInBounds, DebugLoc DL)
Definition: VPlan.h:1668
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1685
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1678
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1692
A recipe for widening Call instructions.
Definition: VPlan.h:1534
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1575
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1558
VPWidenCallRecipe(Value *UV, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID, DebugLoc DL={}, Function *Variant=nullptr)
Definition: VPlan.h:1546
Function * getCalledScalarFunction() const
Definition: VPlan.h:1568
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1572
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:2850
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2857
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:2852
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1447
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1455
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1488
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1491
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1463
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1469
A recipe for handling GEP instructions.
Definition: VPlan.h:1621
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1643
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1638
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1784
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc)
Definition: VPlan.h:1797
const TruncInst * getTruncInst() const
Definition: VPlan.h:1845
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:1831
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1807
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1844
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1839
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc)
Definition: VPlan.h:1790
const VPValue * getStepValue() const
Definition: VPlan.h:1840
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1858
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1824
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1850
A common base class for widening memory operations.
Definition: VPlan.h:2461
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2472
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2469
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2508
static bool classof(const VPUser *U)
Definition: VPlan.h:2502
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2528
Instruction & Ingredient
Definition: VPlan.h:2463
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2491
Instruction & getIngredient() const
Definition: VPlan.h:2532
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2466
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2495
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2482
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2522
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2518
void setMask(VPValue *Mask)
Definition: VPlan.h:2474
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2515
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2512
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:1910
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1940
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1949
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1916
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1922
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1946
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1883
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1898
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1871
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1410
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1421
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1415
unsigned getOpcode() const
Definition: VPlan.h:1437
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:3514
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:3555
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1287
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:3708
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:3798
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:3786
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:3795
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3268
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1178
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1154
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:921
bool hasScalableVF()
Definition: VPlan.h:3409
VPBasicBlock * getEntry()
Definition: VPlan.h:3370
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3395
void setName(const Twine &newName)
Definition: VPlan.h:3432
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3398
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3374
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3388
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3415
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:1187
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3371
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader, trip count TC and Entry to the plan.
Definition: VPlan.h:3328
VPBasicBlock * getPreheader()
Definition: VPlan.h:3503
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3470
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:3473
bool hasVF(ElementCount VF)
Definition: VPlan.h:3408
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:3497
bool hasUF(unsigned UF) const
Definition: VPlan.h:3421
void setVF(ElementCount VF)
Definition: VPlan.h:3402
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1091
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3381
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header ) which con...
Definition: VPlan.cpp:863
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader and Entry to the plan.
Definition: VPlan.h:3337
const VPBasicBlock * getPreheader() const
Definition: VPlan.h:3504
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3436
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1184
bool hasScalarVFOnly() const
Definition: VPlan.h:3419
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:981
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:3478
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:3489
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1128
void addVF(ElementCount VF)
Definition: VPlan.h:3400
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3453
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:3493
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1098
void setUF(unsigned UF)
Definition: VPlan.h:3423
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1230
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
bool isUniformAfterVectorization(const VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3818
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1615
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1610
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1605
bool isHeaderMask(const VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
Definition: VPlan.cpp:1632
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *OrigLoop)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:147
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:95
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:100
iterator end()
Definition: VPlan.h:141
const ElementCount Start
Definition: VPlan.h:102
ElementCount End
Definition: VPlan.h:105
iterator begin()
Definition: VPlan.h:140
bool isEmpty() const
Definition: VPlan.h:107
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:111
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:737
LLVMContext & LLVMCtx
Definition: VPlan.h:741
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM)
Definition: VPlan.h:745
LoopVectorizationCostModel & CM
Definition: VPlan.h:742
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:740
const TargetLibraryInfo & TLI
Definition: VPlan.h:739
const TargetTransformInfo & TTI
Definition: VPlan.h:738
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:743
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1955
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1965
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1956
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1961
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:238
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:248
unsigned Part
in [0..UF)
Definition: VPlan.h:240
VPLane Lane
Definition: VPlan.h:242
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:244
bool isFirstIteration() const
Definition: VPlan.h:250
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:985
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:378
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:384
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:392
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:380
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:388
CFGState(DominatorTree *DT)
Definition: VPlan.h:397
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:361
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:395
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:273
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:278
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:275
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:255
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:259
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:406
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:429
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:432
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:374
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:322
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:344
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:425
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:366
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:406
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:330
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:307
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:267
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:409
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:295
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:415
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:412
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:261
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:418
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:385
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2576
void execute(VPTransformState &State) override
Generate the wide load or gather.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2588
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2600
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:2577
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2537
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2538
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2564
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2546
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1587
bool isInvariantCond() const
Definition: VPlan.h:1615
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1595
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1589
VPValue * getCond() const
Definition: VPlan.h:1611
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:2652
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2663
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2678
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:2653
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2666
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:2611
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2640
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2612
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2628
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2619
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:3564
const Value * V
Definition: VPlan.h:3562
void print(raw_ostream &O) const
Definition: VPlan.cpp:1405