LLVM 19.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/Twine.h"
36#include "llvm/ADT/ilist.h"
37#include "llvm/ADT/ilist_node.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/FMF.h"
43#include "llvm/IR/Operator.h"
44#include <algorithm>
45#include <cassert>
46#include <cstddef>
47#include <string>
48
49namespace llvm {
50
51class BasicBlock;
52class DominatorTree;
53class InnerLoopVectorizer;
54class IRBuilderBase;
55class LoopInfo;
56class raw_ostream;
57class RecurrenceDescriptor;
58class SCEV;
59class Type;
60class VPBasicBlock;
61class VPRegionBlock;
62class VPlan;
63class VPReplicateRecipe;
64class VPlanSlp;
65class Value;
66class LoopVersioning;
67
68namespace Intrinsic {
69typedef unsigned ID;
70}
71
72/// Returns a calculation for the total number of elements for a given \p VF.
73/// For fixed width vectors this value is a constant, whereas for scalable
74/// vectors it is an expression determined at runtime.
75Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
76
77/// Return a value for Step multiplied by VF.
78Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
79 int64_t Step);
80
81const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
82 Loop *CurLoop = nullptr);
83
84/// A range of powers-of-2 vectorization factors with fixed start and
85/// adjustable end. The range includes start and excludes end, e.g.,:
86/// [1, 16) = {1, 2, 4, 8}
87struct VFRange {
88 // A power of 2.
90
91 // A power of 2. If End <= Start range is empty.
93
94 bool isEmpty() const {
96 }
97
99 : Start(Start), End(End) {
101 "Both Start and End should have the same scalable flag");
103 "Expected Start to be a power of 2");
105 "Expected End to be a power of 2");
106 }
107
108 /// Iterator to iterate over vectorization factors in a VFRange.
110 : public iterator_facade_base<iterator, std::forward_iterator_tag,
111 ElementCount> {
112 ElementCount VF;
113
114 public:
115 iterator(ElementCount VF) : VF(VF) {}
116
117 bool operator==(const iterator &Other) const { return VF == Other.VF; }
118
119 ElementCount operator*() const { return VF; }
120
122 VF *= 2;
123 return *this;
124 }
125 };
126
130 return iterator(End);
131 }
132};
133
134using VPlanPtr = std::unique_ptr<VPlan>;
135
136/// In what follows, the term "input IR" refers to code that is fed into the
137/// vectorizer whereas the term "output IR" refers to code that is generated by
138/// the vectorizer.
139
140/// VPLane provides a way to access lanes in both fixed width and scalable
141/// vectors, where for the latter the lane index sometimes needs calculating
142/// as a runtime expression.
143class VPLane {
144public:
145 /// Kind describes how to interpret Lane.
146 enum class Kind : uint8_t {
147 /// For First, Lane is the index into the first N elements of a
148 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
149 First,
150 /// For ScalableLast, Lane is the offset from the start of the last
151 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
152 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
153 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
155 };
156
157private:
158 /// in [0..VF)
159 unsigned Lane;
160
161 /// Indicates how the Lane should be interpreted, as described above.
162 Kind LaneKind;
163
164public:
165 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
166
168
170 unsigned LaneOffset = VF.getKnownMinValue() - 1;
171 Kind LaneKind;
172 if (VF.isScalable())
173 // In this case 'LaneOffset' refers to the offset from the start of the
174 // last subvector with VF.getKnownMinValue() elements.
176 else
177 LaneKind = VPLane::Kind::First;
178 return VPLane(LaneOffset, LaneKind);
179 }
180
181 /// Returns a compile-time known value for the lane index and asserts if the
182 /// lane can only be calculated at runtime.
183 unsigned getKnownLane() const {
184 assert(LaneKind == Kind::First);
185 return Lane;
186 }
187
188 /// Returns an expression describing the lane index that can be used at
189 /// runtime.
190 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
191
192 /// Returns the Kind of lane offset.
193 Kind getKind() const { return LaneKind; }
194
195 /// Returns true if this is the first lane of the whole vector.
196 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
197
198 /// Maps the lane to a cache index based on \p VF.
199 unsigned mapToCacheIndex(const ElementCount &VF) const {
200 switch (LaneKind) {
202 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
203 return VF.getKnownMinValue() + Lane;
204 default:
205 assert(Lane < VF.getKnownMinValue());
206 return Lane;
207 }
208 }
209
210 /// Returns the maxmimum number of lanes that we are able to consider
211 /// caching for \p VF.
212 static unsigned getNumCachedLanes(const ElementCount &VF) {
213 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
214 }
215};
216
217/// VPIteration represents a single point in the iteration space of the output
218/// (vectorized and/or unrolled) IR loop.
220 /// in [0..UF)
221 unsigned Part;
222
224
225 VPIteration(unsigned Part, unsigned Lane,
227 : Part(Part), Lane(Lane, Kind) {}
228
229 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
230
231 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
240
241 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
243 unsigned UF;
244
245 /// Hold the indices to generate specific scalar instructions. Null indicates
246 /// that all instances are to be generated, using either scalar or vector
247 /// instructions.
248 std::optional<VPIteration> Instance;
249
250 struct DataState {
251 /// A type for vectorized values in the new loop. Each value from the
252 /// original loop, when vectorized, is represented by UF vector values in
253 /// the new unrolled loop, where UF is the unroll factor.
255
257
261
262 /// Get the generated vector Value for a given VPValue \p Def and a given \p
263 /// Part if \p IsScalar is false, otherwise return the generated scalar
264 /// for \p Part. \See set.
265 Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
266
267 /// Get the generated Value for a given VPValue and given Part and Lane.
268 Value *get(VPValue *Def, const VPIteration &Instance);
269
270 bool hasVectorValue(VPValue *Def, unsigned Part) {
271 auto I = Data.PerPartOutput.find(Def);
272 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
273 I->second[Part];
274 }
275
277 auto I = Data.PerPartScalars.find(Def);
278 if (I == Data.PerPartScalars.end())
279 return false;
280 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
281 return Instance.Part < I->second.size() &&
282 CacheIdx < I->second[Instance.Part].size() &&
283 I->second[Instance.Part][CacheIdx];
284 }
285
286 /// Set the generated vector Value for a given VPValue and a given Part, if \p
287 /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
288 void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
289 if (IsScalar) {
290 set(Def, V, VPIteration(Part, 0));
291 return;
292 }
293 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
294 "scalar values must be stored as (Part, 0)");
295 if (!Data.PerPartOutput.count(Def)) {
297 Data.PerPartOutput[Def] = Entry;
298 }
299 Data.PerPartOutput[Def][Part] = V;
300 }
301
302 /// Reset an existing vector value for \p Def and a given \p Part.
303 void reset(VPValue *Def, Value *V, unsigned Part) {
304 auto Iter = Data.PerPartOutput.find(Def);
305 assert(Iter != Data.PerPartOutput.end() &&
306 "need to overwrite existing value");
307 Iter->second[Part] = V;
308 }
309
310 /// Set the generated scalar \p V for \p Def and the given \p Instance.
311 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
312 auto Iter = Data.PerPartScalars.insert({Def, {}});
313 auto &PerPartVec = Iter.first->second;
314 if (PerPartVec.size() <= Instance.Part)
315 PerPartVec.resize(Instance.Part + 1);
316 auto &Scalars = PerPartVec[Instance.Part];
317 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
318 if (Scalars.size() <= CacheIdx)
319 Scalars.resize(CacheIdx + 1);
320 assert(!Scalars[CacheIdx] && "should overwrite existing value");
321 Scalars[CacheIdx] = V;
322 }
323
324 /// Reset an existing scalar value for \p Def and a given \p Instance.
325 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
326 auto Iter = Data.PerPartScalars.find(Def);
327 assert(Iter != Data.PerPartScalars.end() &&
328 "need to overwrite existing value");
329 assert(Instance.Part < Iter->second.size() &&
330 "need to overwrite existing value");
331 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
332 assert(CacheIdx < Iter->second[Instance.Part].size() &&
333 "need to overwrite existing value");
334 Iter->second[Instance.Part][CacheIdx] = V;
335 }
336
337 /// Add additional metadata to \p To that was not present on \p Orig.
338 ///
339 /// Currently this is used to add the noalias annotations based on the
340 /// inserted memchecks. Use this for instructions that are *cloned* into the
341 /// vector loop.
342 void addNewMetadata(Instruction *To, const Instruction *Orig);
343
344 /// Add metadata from one instruction to another.
345 ///
346 /// This includes both the original MDs from \p From and additional ones (\see
347 /// addNewMetadata). Use this for *newly created* instructions in the vector
348 /// loop.
349 void addMetadata(Value *To, Instruction *From);
350
351 /// Set the debug location in the builder using the debug location \p DL.
353
354 /// Construct the vector value of a scalarized value \p V one lane at a time.
356
357 /// Hold state information used when constructing the CFG of the output IR,
358 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
359 struct CFGState {
360 /// The previous VPBasicBlock visited. Initially set to null.
362
363 /// The previous IR BasicBlock created or used. Initially set to the new
364 /// header BasicBlock.
365 BasicBlock *PrevBB = nullptr;
366
367 /// The last IR BasicBlock in the output IR. Set to the exit block of the
368 /// vector loop.
369 BasicBlock *ExitBB = nullptr;
370
371 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
372 /// of replication, maps the BasicBlock of the last replica created.
374
375 CFGState() = default;
376
377 /// Returns the BasicBlock* mapped to the pre-header of the loop region
378 /// containing \p R.
381
382 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
384
385 /// Hold a pointer to Dominator Tree to register new basic blocks in the loop.
387
388 /// Hold a reference to the IRBuilder used to generate output IR code.
390
391 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
393
394 /// Pointer to the VPlan code is generated for.
396
397 /// The loop object for the current parent region, or nullptr.
399
400 /// LoopVersioning. It's only set up (non-null) if memchecks were
401 /// used.
402 ///
403 /// This is currently only used to add no-alias metadata based on the
404 /// memchecks. The actually versioning is performed manually.
406
407 /// Map SCEVs to their expanded values. Populated when executing
408 /// VPExpandSCEVRecipes.
410
411 /// VPlan-based type analysis.
413};
414
415/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
416/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
418 friend class VPBlockUtils;
419
420 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
421
422 /// An optional name for the block.
423 std::string Name;
424
425 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
426 /// it is a topmost VPBlockBase.
427 VPRegionBlock *Parent = nullptr;
428
429 /// List of predecessor blocks.
431
432 /// List of successor blocks.
434
435 /// VPlan containing the block. Can only be set on the entry block of the
436 /// plan.
437 VPlan *Plan = nullptr;
438
439 /// Add \p Successor as the last successor to this block.
440 void appendSuccessor(VPBlockBase *Successor) {
441 assert(Successor && "Cannot add nullptr successor!");
442 Successors.push_back(Successor);
443 }
444
445 /// Add \p Predecessor as the last predecessor to this block.
446 void appendPredecessor(VPBlockBase *Predecessor) {
447 assert(Predecessor && "Cannot add nullptr predecessor!");
448 Predecessors.push_back(Predecessor);
449 }
450
451 /// Remove \p Predecessor from the predecessors of this block.
452 void removePredecessor(VPBlockBase *Predecessor) {
453 auto Pos = find(Predecessors, Predecessor);
454 assert(Pos && "Predecessor does not exist");
455 Predecessors.erase(Pos);
456 }
457
458 /// Remove \p Successor from the successors of this block.
459 void removeSuccessor(VPBlockBase *Successor) {
460 auto Pos = find(Successors, Successor);
461 assert(Pos && "Successor does not exist");
462 Successors.erase(Pos);
463 }
464
465protected:
466 VPBlockBase(const unsigned char SC, const std::string &N)
467 : SubclassID(SC), Name(N) {}
468
469public:
470 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
471 /// that are actually instantiated. Values of this enumeration are kept in the
472 /// SubclassID field of the VPBlockBase objects. They are used for concrete
473 /// type identification.
474 using VPBlockTy = enum { VPBasicBlockSC, VPRegionBlockSC };
475
477
478 virtual ~VPBlockBase() = default;
479
480 const std::string &getName() const { return Name; }
481
482 void setName(const Twine &newName) { Name = newName.str(); }
483
484 /// \return an ID for the concrete type of this object.
485 /// This is used to implement the classof checks. This should not be used
486 /// for any other purpose, as the values may change as LLVM evolves.
487 unsigned getVPBlockID() const { return SubclassID; }
488
489 VPRegionBlock *getParent() { return Parent; }
490 const VPRegionBlock *getParent() const { return Parent; }
491
492 /// \return A pointer to the plan containing the current block.
493 VPlan *getPlan();
494 const VPlan *getPlan() const;
495
496 /// Sets the pointer of the plan containing the block. The block must be the
497 /// entry block into the VPlan.
498 void setPlan(VPlan *ParentPlan);
499
500 void setParent(VPRegionBlock *P) { Parent = P; }
501
502 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
503 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
504 /// VPBlockBase is a VPBasicBlock, it is returned.
505 const VPBasicBlock *getEntryBasicBlock() const;
507
508 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
509 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
510 /// VPBlockBase is a VPBasicBlock, it is returned.
511 const VPBasicBlock *getExitingBasicBlock() const;
513
514 const VPBlocksTy &getSuccessors() const { return Successors; }
515 VPBlocksTy &getSuccessors() { return Successors; }
516
518
519 const VPBlocksTy &getPredecessors() const { return Predecessors; }
520 VPBlocksTy &getPredecessors() { return Predecessors; }
521
522 /// \return the successor of this VPBlockBase if it has a single successor.
523 /// Otherwise return a null pointer.
525 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
526 }
527
528 /// \return the predecessor of this VPBlockBase if it has a single
529 /// predecessor. Otherwise return a null pointer.
531 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
532 }
533
534 size_t getNumSuccessors() const { return Successors.size(); }
535 size_t getNumPredecessors() const { return Predecessors.size(); }
536
537 /// An Enclosing Block of a block B is any block containing B, including B
538 /// itself. \return the closest enclosing block starting from "this", which
539 /// has successors. \return the root enclosing block if all enclosing blocks
540 /// have no successors.
542
543 /// \return the closest enclosing block starting from "this", which has
544 /// predecessors. \return the root enclosing block if all enclosing blocks
545 /// have no predecessors.
547
548 /// \return the successors either attached directly to this VPBlockBase or, if
549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
550 /// successors of its own, search recursively for the first enclosing
551 /// VPRegionBlock that has successors and return them. If no such
552 /// VPRegionBlock exists, return the (empty) successors of the topmost
553 /// VPBlockBase reached.
556 }
557
558 /// \return the hierarchical successor of this VPBlockBase if it has a single
559 /// hierarchical successor. Otherwise return a null pointer.
562 }
563
564 /// \return the predecessors either attached directly to this VPBlockBase or,
565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566 /// predecessors of its own, search recursively for the first enclosing
567 /// VPRegionBlock that has predecessors and return them. If no such
568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
569 /// VPBlockBase reached.
572 }
573
574 /// \return the hierarchical predecessor of this VPBlockBase if it has a
575 /// single hierarchical predecessor. Otherwise return a null pointer.
578 }
579
580 /// Set a given VPBlockBase \p Successor as the single successor of this
581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582 /// This VPBlockBase must have no successors.
584 assert(Successors.empty() && "Setting one successor when others exist.");
585 assert(Successor->getParent() == getParent() &&
586 "connected blocks must have the same parent");
587 appendSuccessor(Successor);
588 }
589
590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591 /// successors of this VPBlockBase. This VPBlockBase is not added as
592 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
593 /// successors.
594 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
595 assert(Successors.empty() && "Setting two successors when others exist.");
596 appendSuccessor(IfTrue);
597 appendSuccessor(IfFalse);
598 }
599
600 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602 /// as successor of any VPBasicBlock in \p NewPreds.
604 assert(Predecessors.empty() && "Block predecessors already set.");
605 for (auto *Pred : NewPreds)
606 appendPredecessor(Pred);
607 }
608
609 /// Remove all the predecessor of this block.
610 void clearPredecessors() { Predecessors.clear(); }
611
612 /// Remove all the successors of this block.
613 void clearSuccessors() { Successors.clear(); }
614
615 /// The method which generates the output IR that correspond to this
616 /// VPBlockBase, thereby "executing" the VPlan.
617 virtual void execute(VPTransformState *State) = 0;
618
619 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
620 static void deleteCFG(VPBlockBase *Entry);
621
622 /// Return true if it is legal to hoist instructions into this block.
624 // There are currently no constraints that prevent an instruction to be
625 // hoisted into a VPBlockBase.
626 return true;
627 }
628
629 /// Replace all operands of VPUsers in the block with \p NewValue and also
630 /// replaces all uses of VPValues defined in the block with NewValue.
631 virtual void dropAllReferences(VPValue *NewValue) = 0;
632
633#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
634 void printAsOperand(raw_ostream &OS, bool PrintType) const {
635 OS << getName();
636 }
637
638 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
639 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
640 /// consequtive numbers.
641 ///
642 /// Note that the numbering is applied to the whole VPlan, so printing
643 /// individual blocks is consistent with the whole VPlan printing.
644 virtual void print(raw_ostream &O, const Twine &Indent,
645 VPSlotTracker &SlotTracker) const = 0;
646
647 /// Print plain-text dump of this VPlan to \p O.
648 void print(raw_ostream &O) const {
650 print(O, "", SlotTracker);
651 }
652
653 /// Print the successors of this block to \p O, prefixing all lines with \p
654 /// Indent.
655 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
656
657 /// Dump this VPBlockBase to dbgs().
658 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
659#endif
660
661 /// Clone the current block and it's recipes without updating the operands of
662 /// the cloned recipes, including all blocks in the single-entry single-exit
663 /// region for VPRegionBlocks.
664 virtual VPBlockBase *clone() = 0;
665};
666
667/// A value that is used outside the VPlan. The operand of the user needs to be
668/// added to the associated LCSSA phi node.
669class VPLiveOut : public VPUser {
670 PHINode *Phi;
671
672public:
674 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
675
676 static inline bool classof(const VPUser *U) {
677 return U->getVPUserID() == VPUser::VPUserID::LiveOut;
678 }
679
680 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
681 /// means we need to add the appropriate incoming value from the middle
682 /// block as exiting edges from the scalar epilogue loop (if present) are
683 /// already in place, and we exit the vector loop exclusively to the middle
684 /// block.
685 void fixPhi(VPlan &Plan, VPTransformState &State);
686
687 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
688 bool usesScalars(const VPValue *Op) const override {
690 "Op must be an operand of the recipe");
691 return true;
692 }
693
694 PHINode *getPhi() const { return Phi; }
695
696#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
697 /// Print the VPLiveOut to \p O.
699#endif
700};
701
702/// VPRecipeBase is a base class modeling a sequence of one or more output IR
703/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
704/// and is responsible for deleting its defined values. Single-value
705/// recipes must inherit from VPSingleDef instead of inheriting from both
706/// VPRecipeBase and VPValue separately.
707class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
708 public VPDef,
709 public VPUser {
710 friend VPBasicBlock;
711 friend class VPBlockUtils;
712
713 /// Each VPRecipe belongs to a single VPBasicBlock.
714 VPBasicBlock *Parent = nullptr;
715
716 /// The debug location for the recipe.
717 DebugLoc DL;
718
719public:
721 DebugLoc DL = {})
723
724 template <typename IterT>
726 DebugLoc DL = {})
728 virtual ~VPRecipeBase() = default;
729
730 /// Clone the current recipe.
731 virtual VPRecipeBase *clone() = 0;
732
733 /// \return the VPBasicBlock which this VPRecipe belongs to.
734 VPBasicBlock *getParent() { return Parent; }
735 const VPBasicBlock *getParent() const { return Parent; }
736
737 /// The method which generates the output IR instructions that correspond to
738 /// this VPRecipe, thereby "executing" the VPlan.
739 virtual void execute(VPTransformState &State) = 0;
740
741 /// Insert an unlinked recipe into a basic block immediately before
742 /// the specified recipe.
743 void insertBefore(VPRecipeBase *InsertPos);
744 /// Insert an unlinked recipe into \p BB immediately before the insertion
745 /// point \p IP;
747
748 /// Insert an unlinked Recipe into a basic block immediately after
749 /// the specified Recipe.
750 void insertAfter(VPRecipeBase *InsertPos);
751
752 /// Unlink this recipe from its current VPBasicBlock and insert it into
753 /// the VPBasicBlock that MovePos lives in, right after MovePos.
754 void moveAfter(VPRecipeBase *MovePos);
755
756 /// Unlink this recipe and insert into BB before I.
757 ///
758 /// \pre I is a valid iterator into BB.
760
761 /// This method unlinks 'this' from the containing basic block, but does not
762 /// delete it.
763 void removeFromParent();
764
765 /// This method unlinks 'this' from the containing basic block and deletes it.
766 ///
767 /// \returns an iterator pointing to the element after the erased one
769
770 /// Method to support type inquiry through isa, cast, and dyn_cast.
771 static inline bool classof(const VPDef *D) {
772 // All VPDefs are also VPRecipeBases.
773 return true;
774 }
775
776 static inline bool classof(const VPUser *U) {
777 return U->getVPUserID() == VPUser::VPUserID::Recipe;
778 }
779
780 /// Returns true if the recipe may have side-effects.
781 bool mayHaveSideEffects() const;
782
783 /// Returns true for PHI-like recipes.
784 bool isPhi() const {
785 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
786 }
787
788 /// Returns true if the recipe may read from memory.
789 bool mayReadFromMemory() const;
790
791 /// Returns true if the recipe may write to memory.
792 bool mayWriteToMemory() const;
793
794 /// Returns true if the recipe may read from or write to memory.
795 bool mayReadOrWriteMemory() const {
797 }
798
799 /// Returns the debug location of the recipe.
800 DebugLoc getDebugLoc() const { return DL; }
801};
802
803// Helper macro to define common classof implementations for recipes.
804#define VP_CLASSOF_IMPL(VPDefID) \
805 static inline bool classof(const VPDef *D) { \
806 return D->getVPDefID() == VPDefID; \
807 } \
808 static inline bool classof(const VPValue *V) { \
809 auto *R = V->getDefiningRecipe(); \
810 return R && R->getVPDefID() == VPDefID; \
811 } \
812 static inline bool classof(const VPUser *U) { \
813 auto *R = dyn_cast<VPRecipeBase>(U); \
814 return R && R->getVPDefID() == VPDefID; \
815 } \
816 static inline bool classof(const VPRecipeBase *R) { \
817 return R->getVPDefID() == VPDefID; \
818 } \
819 static inline bool classof(const VPSingleDefRecipe *R) { \
820 return R->getVPDefID() == VPDefID; \
821 }
822
823/// VPSingleDef is a base class for recipes for modeling a sequence of one or
824/// more output IR that define a single result VPValue.
825/// Note that VPRecipeBase must be inherited from before VPValue.
826class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
827public:
828 template <typename IterT>
829 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
830 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
831
832 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
833 DebugLoc DL = {})
834 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
835
836 template <typename IterT>
837 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
838 DebugLoc DL = {})
839 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
840
841 static inline bool classof(const VPRecipeBase *R) {
842 switch (R->getVPDefID()) {
843 case VPRecipeBase::VPDerivedIVSC:
844 case VPRecipeBase::VPEVLBasedIVPHISC:
845 case VPRecipeBase::VPExpandSCEVSC:
846 case VPRecipeBase::VPInstructionSC:
847 case VPRecipeBase::VPReductionSC:
848 case VPRecipeBase::VPReplicateSC:
849 case VPRecipeBase::VPScalarIVStepsSC:
850 case VPRecipeBase::VPVectorPointerSC:
851 case VPRecipeBase::VPWidenCallSC:
852 case VPRecipeBase::VPWidenCanonicalIVSC:
853 case VPRecipeBase::VPWidenCastSC:
854 case VPRecipeBase::VPWidenGEPSC:
855 case VPRecipeBase::VPWidenSC:
856 case VPRecipeBase::VPWidenSelectSC:
857 case VPRecipeBase::VPBlendSC:
858 case VPRecipeBase::VPPredInstPHISC:
859 case VPRecipeBase::VPCanonicalIVPHISC:
860 case VPRecipeBase::VPActiveLaneMaskPHISC:
861 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
862 case VPRecipeBase::VPWidenPHISC:
863 case VPRecipeBase::VPWidenIntOrFpInductionSC:
864 case VPRecipeBase::VPWidenPointerInductionSC:
865 case VPRecipeBase::VPReductionPHISC:
866 case VPRecipeBase::VPScalarCastSC:
867 return true;
868 case VPRecipeBase::VPInterleaveSC:
869 case VPRecipeBase::VPBranchOnMaskSC:
870 case VPRecipeBase::VPWidenLoadEVLSC:
871 case VPRecipeBase::VPWidenLoadSC:
872 case VPRecipeBase::VPWidenStoreEVLSC:
873 case VPRecipeBase::VPWidenStoreSC:
874 // TODO: Widened stores don't define a value, but widened loads do. Split
875 // the recipes to be able to make widened loads VPSingleDefRecipes.
876 return false;
877 }
878 llvm_unreachable("Unhandled VPDefID");
879 }
880
881 static inline bool classof(const VPUser *U) {
882 auto *R = dyn_cast<VPRecipeBase>(U);
883 return R && classof(R);
884 }
885
886 virtual VPSingleDefRecipe *clone() override = 0;
887
888 /// Returns the underlying instruction.
890 return cast<Instruction>(getUnderlyingValue());
891 }
893 return cast<Instruction>(getUnderlyingValue());
894 }
895};
896
897/// Class to record LLVM IR flag for a recipe along with it.
899 enum class OperationType : unsigned char {
900 Cmp,
901 OverflowingBinOp,
902 DisjointOp,
903 PossiblyExactOp,
904 GEPOp,
905 FPMathOp,
906 NonNegOp,
907 Other
908 };
909
910public:
911 struct WrapFlagsTy {
912 char HasNUW : 1;
913 char HasNSW : 1;
914
916 };
917
919 char IsDisjoint : 1;
921 };
922
923protected:
924 struct GEPFlagsTy {
925 char IsInBounds : 1;
927 };
928
929private:
930 struct ExactFlagsTy {
931 char IsExact : 1;
932 };
933 struct NonNegFlagsTy {
934 char NonNeg : 1;
935 };
936 struct FastMathFlagsTy {
937 char AllowReassoc : 1;
938 char NoNaNs : 1;
939 char NoInfs : 1;
940 char NoSignedZeros : 1;
941 char AllowReciprocal : 1;
942 char AllowContract : 1;
943 char ApproxFunc : 1;
944
945 FastMathFlagsTy(const FastMathFlags &FMF);
946 };
947
948 OperationType OpType;
949
950 union {
954 ExactFlagsTy ExactFlags;
956 NonNegFlagsTy NonNegFlags;
957 FastMathFlagsTy FMFs;
958 unsigned AllFlags;
959 };
960
961protected:
963 OpType = Other.OpType;
964 AllFlags = Other.AllFlags;
965 }
966
967public:
968 template <typename IterT>
969 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
970 : VPSingleDefRecipe(SC, Operands, DL) {
971 OpType = OperationType::Other;
972 AllFlags = 0;
973 }
974
975 template <typename IterT>
976 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
978 if (auto *Op = dyn_cast<CmpInst>(&I)) {
979 OpType = OperationType::Cmp;
980 CmpPredicate = Op->getPredicate();
981 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
982 OpType = OperationType::DisjointOp;
983 DisjointFlags.IsDisjoint = Op->isDisjoint();
984 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
985 OpType = OperationType::OverflowingBinOp;
986 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
987 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
988 OpType = OperationType::PossiblyExactOp;
989 ExactFlags.IsExact = Op->isExact();
990 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
991 OpType = OperationType::GEPOp;
992 GEPFlags.IsInBounds = GEP->isInBounds();
993 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
994 OpType = OperationType::NonNegOp;
995 NonNegFlags.NonNeg = PNNI->hasNonNeg();
996 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
997 OpType = OperationType::FPMathOp;
998 FMFs = Op->getFastMathFlags();
999 } else {
1000 OpType = OperationType::Other;
1001 AllFlags = 0;
1002 }
1003 }
1004
1005 template <typename IterT>
1006 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1007 CmpInst::Predicate Pred, DebugLoc DL = {})
1008 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1009 CmpPredicate(Pred) {}
1010
1011 template <typename IterT>
1012 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1014 : VPSingleDefRecipe(SC, Operands, DL),
1015 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1016
1017 template <typename IterT>
1018 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1019 FastMathFlags FMFs, DebugLoc DL = {})
1020 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1021 FMFs(FMFs) {}
1022
1023 template <typename IterT>
1024 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1026 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1028
1029protected:
1030 template <typename IterT>
1031 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1032 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1033 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1034 GEPFlags(GEPFlags) {}
1035
1036public:
1037 static inline bool classof(const VPRecipeBase *R) {
1038 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1039 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1040 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1041 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1042 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1043 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1044 }
1045
1046 static inline bool classof(const VPUser *U) {
1047 auto *R = dyn_cast<VPRecipeBase>(U);
1048 return R && classof(R);
1049 }
1050
1051 /// Drop all poison-generating flags.
1053 // NOTE: This needs to be kept in-sync with
1054 // Instruction::dropPoisonGeneratingFlags.
1055 switch (OpType) {
1056 case OperationType::OverflowingBinOp:
1057 WrapFlags.HasNUW = false;
1058 WrapFlags.HasNSW = false;
1059 break;
1060 case OperationType::DisjointOp:
1061 DisjointFlags.IsDisjoint = false;
1062 break;
1063 case OperationType::PossiblyExactOp:
1064 ExactFlags.IsExact = false;
1065 break;
1066 case OperationType::GEPOp:
1067 GEPFlags.IsInBounds = false;
1068 break;
1069 case OperationType::FPMathOp:
1070 FMFs.NoNaNs = false;
1071 FMFs.NoInfs = false;
1072 break;
1073 case OperationType::NonNegOp:
1074 NonNegFlags.NonNeg = false;
1075 break;
1076 case OperationType::Cmp:
1077 case OperationType::Other:
1078 break;
1079 }
1080 }
1081
1082 /// Set the IR flags for \p I.
1083 void setFlags(Instruction *I) const {
1084 switch (OpType) {
1085 case OperationType::OverflowingBinOp:
1086 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1087 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1088 break;
1089 case OperationType::DisjointOp:
1090 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1091 break;
1092 case OperationType::PossiblyExactOp:
1093 I->setIsExact(ExactFlags.IsExact);
1094 break;
1095 case OperationType::GEPOp:
1096 cast<GetElementPtrInst>(I)->setIsInBounds(GEPFlags.IsInBounds);
1097 break;
1098 case OperationType::FPMathOp:
1099 I->setHasAllowReassoc(FMFs.AllowReassoc);
1100 I->setHasNoNaNs(FMFs.NoNaNs);
1101 I->setHasNoInfs(FMFs.NoInfs);
1102 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1103 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1104 I->setHasAllowContract(FMFs.AllowContract);
1105 I->setHasApproxFunc(FMFs.ApproxFunc);
1106 break;
1107 case OperationType::NonNegOp:
1108 I->setNonNeg(NonNegFlags.NonNeg);
1109 break;
1110 case OperationType::Cmp:
1111 case OperationType::Other:
1112 break;
1113 }
1114 }
1115
1117 assert(OpType == OperationType::Cmp &&
1118 "recipe doesn't have a compare predicate");
1119 return CmpPredicate;
1120 }
1121
1122 bool isInBounds() const {
1123 assert(OpType == OperationType::GEPOp &&
1124 "recipe doesn't have inbounds flag");
1125 return GEPFlags.IsInBounds;
1126 }
1127
1128 /// Returns true if the recipe has fast-math flags.
1129 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1130
1132
1133 bool hasNoUnsignedWrap() const {
1134 assert(OpType == OperationType::OverflowingBinOp &&
1135 "recipe doesn't have a NUW flag");
1136 return WrapFlags.HasNUW;
1137 }
1138
1139 bool hasNoSignedWrap() const {
1140 assert(OpType == OperationType::OverflowingBinOp &&
1141 "recipe doesn't have a NSW flag");
1142 return WrapFlags.HasNSW;
1143 }
1144
1145 bool isDisjoint() const {
1146 assert(OpType == OperationType::DisjointOp &&
1147 "recipe cannot have a disjoing flag");
1149 }
1150
1151#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1152 void printFlags(raw_ostream &O) const;
1153#endif
1154};
1155
1156/// This is a concrete Recipe that models a single VPlan-level instruction.
1157/// While as any Recipe it may generate a sequence of IR instructions when
1158/// executed, these instructions would always form a single-def expression as
1159/// the VPInstruction is also a single def-use vertex.
1161 friend class VPlanSlp;
1162
1163public:
1164 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1165 enum {
1167 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1168 // values of a first-order recurrence.
1175 // Increment the canonical IV separately for each unrolled part.
1180 LogicalAnd, // Non-poison propagating logical And.
1181 // Add an offset in bytes (second operand) to a base pointer (first
1182 // operand). Only generates scalar values (either for the first lane only or
1183 // for all lanes, depending on its uses).
1185 };
1186
1187private:
1188 typedef unsigned char OpcodeTy;
1189 OpcodeTy Opcode;
1190
1191 /// An optional name that can be used for the generated IR instruction.
1192 const std::string Name;
1193
1194 /// Returns true if this VPInstruction generates scalar values for all lanes.
1195 /// Most VPInstructions generate a single value per part, either vector or
1196 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1197 /// values per all lanes, stemming from an original ingredient. This method
1198 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1199 /// underlying ingredient.
1200 bool doesGeneratePerAllLanes() const;
1201
1202 /// Returns true if we can generate a scalar for the first lane only if
1203 /// needed.
1204 bool canGenerateScalarForFirstLane() const;
1205
1206 /// Utility methods serving execute(): generates a single instance of the
1207 /// modeled instruction for a given part. \returns the generated value for \p
1208 /// Part. In some cases an existing value is returned rather than a generated
1209 /// one.
1210 Value *generatePerPart(VPTransformState &State, unsigned Part);
1211
1212 /// Utility methods serving execute(): generates a scalar single instance of
1213 /// the modeled instruction for a given lane. \returns the scalar generated
1214 /// value for lane \p Lane.
1215 Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1216
1217#if !defined(NDEBUG)
1218 /// Return true if the VPInstruction is a floating point math operation, i.e.
1219 /// has fast-math flags.
1220 bool isFPMathOp() const;
1221#endif
1222
1223public:
1225 const Twine &Name = "")
1226 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1227 Opcode(Opcode), Name(Name.str()) {}
1228
1229 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1230 DebugLoc DL = {}, const Twine &Name = "")
1232
1233 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1234 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1235
1236 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1237 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1238 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1239 Opcode(Opcode), Name(Name.str()) {}
1240
1241 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1242 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1243 const Twine &Name = "")
1244 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1245 Opcode(Opcode), Name(Name.str()) {
1246 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1247 }
1248
1249 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1250 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1251
1252 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1253
1254 VPInstruction *clone() override {
1256 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1257 New->transferFlags(*this);
1258 return New;
1259 }
1260
1261 unsigned getOpcode() const { return Opcode; }
1262
1263 /// Generate the instruction.
1264 /// TODO: We currently execute only per-part unless a specific instance is
1265 /// provided.
1266 void execute(VPTransformState &State) override;
1267
1268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1269 /// Print the VPInstruction to \p O.
1270 void print(raw_ostream &O, const Twine &Indent,
1271 VPSlotTracker &SlotTracker) const override;
1272
1273 /// Print the VPInstruction to dbgs() (for debugging).
1274 LLVM_DUMP_METHOD void dump() const;
1275#endif
1276
1277 /// Return true if this instruction may modify memory.
1278 bool mayWriteToMemory() const {
1279 // TODO: we can use attributes of the called function to rule out memory
1280 // modifications.
1281 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1282 Opcode == Instruction::Invoke || Opcode == SLPStore;
1283 }
1284
1285 bool hasResult() const {
1286 // CallInst may or may not have a result, depending on the called function.
1287 // Conservatively return calls have results for now.
1288 switch (getOpcode()) {
1289 case Instruction::Ret:
1290 case Instruction::Br:
1291 case Instruction::Store:
1292 case Instruction::Switch:
1293 case Instruction::IndirectBr:
1294 case Instruction::Resume:
1295 case Instruction::CatchRet:
1296 case Instruction::Unreachable:
1297 case Instruction::Fence:
1298 case Instruction::AtomicRMW:
1301 return false;
1302 default:
1303 return true;
1304 }
1305 }
1306
1307 /// Returns true if the recipe only uses the first lane of operand \p Op.
1308 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1309
1310 /// Returns true if the recipe only uses the first part of operand \p Op.
1311 bool onlyFirstPartUsed(const VPValue *Op) const override {
1313 "Op must be an operand of the recipe");
1314 if (getOperand(0) != Op)
1315 return false;
1316 switch (getOpcode()) {
1317 default:
1318 return false;
1321 return true;
1322 };
1323 llvm_unreachable("switch should return");
1324 }
1325};
1326
1327/// VPWidenRecipe is a recipe for producing a copy of vector type its
1328/// ingredient. This recipe covers most of the traditional vectorization cases
1329/// where each ingredient transforms into a vectorized version of itself.
1331 unsigned Opcode;
1332
1333public:
1334 template <typename IterT>
1336 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1337 Opcode(I.getOpcode()) {}
1338
1339 ~VPWidenRecipe() override = default;
1340
1341 VPWidenRecipe *clone() override {
1342 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1343 R->transferFlags(*this);
1344 return R;
1345 }
1346
1347 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1348
1349 /// Produce widened copies of all Ingredients.
1350 void execute(VPTransformState &State) override;
1351
1352 unsigned getOpcode() const { return Opcode; }
1353
1354#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1355 /// Print the recipe.
1356 void print(raw_ostream &O, const Twine &Indent,
1357 VPSlotTracker &SlotTracker) const override;
1358#endif
1359};
1360
1361/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1363 /// Cast instruction opcode.
1364 Instruction::CastOps Opcode;
1365
1366 /// Result type for the cast.
1367 Type *ResultTy;
1368
1369public:
1371 CastInst &UI)
1372 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1373 ResultTy(ResultTy) {
1374 assert(UI.getOpcode() == Opcode &&
1375 "opcode of underlying cast doesn't match");
1376 assert(UI.getType() == ResultTy &&
1377 "result type of underlying cast doesn't match");
1378 }
1379
1381 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1382 ResultTy(ResultTy) {}
1383
1384 ~VPWidenCastRecipe() override = default;
1385
1387 if (auto *UV = getUnderlyingValue())
1388 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1389 *cast<CastInst>(UV));
1390
1391 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1392 }
1393
1394 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1395
1396 /// Produce widened copies of the cast.
1397 void execute(VPTransformState &State) override;
1398
1399#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1400 /// Print the recipe.
1401 void print(raw_ostream &O, const Twine &Indent,
1402 VPSlotTracker &SlotTracker) const override;
1403#endif
1404
1405 Instruction::CastOps getOpcode() const { return Opcode; }
1406
1407 /// Returns the result type of the cast.
1408 Type *getResultType() const { return ResultTy; }
1409};
1410
1411/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1413 Instruction::CastOps Opcode;
1414
1415 Type *ResultTy;
1416
1417 Value *generate(VPTransformState &State, unsigned Part);
1418
1419public:
1421 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1422 ResultTy(ResultTy) {}
1423
1424 ~VPScalarCastRecipe() override = default;
1425
1427 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1428 }
1429
1430 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1431
1432 void execute(VPTransformState &State) override;
1433
1434#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1435 void print(raw_ostream &O, const Twine &Indent,
1436 VPSlotTracker &SlotTracker) const override;
1437#endif
1438
1439 /// Returns the result type of the cast.
1440 Type *getResultType() const { return ResultTy; }
1441
1442 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1443 // At the moment, only uniform codegen is implemented.
1445 "Op must be an operand of the recipe");
1446 return true;
1447 }
1448};
1449
1450/// A recipe for widening Call instructions.
1452 /// ID of the vector intrinsic to call when widening the call. If set the
1453 /// Intrinsic::not_intrinsic, a library call will be used instead.
1454 Intrinsic::ID VectorIntrinsicID;
1455 /// If this recipe represents a library call, Variant stores a pointer to
1456 /// the chosen function. There is a 1:1 mapping between a given VF and the
1457 /// chosen vectorized variant, so there will be a different vplan for each
1458 /// VF with a valid variant.
1459 Function *Variant;
1460
1461public:
1462 template <typename IterT>
1464 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1465 Function *Variant = nullptr)
1466 : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1467 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1468 assert(
1469 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1470 "last operand must be the called function");
1471 }
1472
1473 ~VPWidenCallRecipe() override = default;
1474
1477 VectorIntrinsicID, getDebugLoc(), Variant);
1478 }
1479
1480 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1481
1482 /// Produce a widened version of the call instruction.
1483 void execute(VPTransformState &State) override;
1484
1486 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1487 }
1488
1490 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1491 }
1493 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1494 }
1495
1496#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1497 /// Print the recipe.
1498 void print(raw_ostream &O, const Twine &Indent,
1499 VPSlotTracker &SlotTracker) const override;
1500#endif
1501};
1502
1503/// A recipe for widening select instructions.
1505 template <typename IterT>
1507 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1508 I.getDebugLoc()) {}
1509
1510 ~VPWidenSelectRecipe() override = default;
1511
1513 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1514 operands());
1515 }
1516
1517 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1518
1519 /// Produce a widened version of the select instruction.
1520 void execute(VPTransformState &State) override;
1521
1522#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1523 /// Print the recipe.
1524 void print(raw_ostream &O, const Twine &Indent,
1525 VPSlotTracker &SlotTracker) const override;
1526#endif
1527
1528 VPValue *getCond() const {
1529 return getOperand(0);
1530 }
1531
1532 bool isInvariantCond() const {
1534 }
1535};
1536
1537/// A recipe for handling GEP instructions.
1539 bool isPointerLoopInvariant() const {
1541 }
1542
1543 bool isIndexLoopInvariant(unsigned I) const {
1545 }
1546
1547 bool areAllOperandsInvariant() const {
1548 return all_of(operands(), [](VPValue *Op) {
1549 return Op->isDefinedOutsideVectorRegions();
1550 });
1551 }
1552
1553public:
1554 template <typename IterT>
1556 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1557
1558 ~VPWidenGEPRecipe() override = default;
1559
1561 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1562 operands());
1563 }
1564
1565 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1566
1567 /// Generate the gep nodes.
1568 void execute(VPTransformState &State) override;
1569
1570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1571 /// Print the recipe.
1572 void print(raw_ostream &O, const Twine &Indent,
1573 VPSlotTracker &SlotTracker) const override;
1574#endif
1575};
1576
1577/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1578/// all parts. If IsReverse is true, compute pointers for accessing the input in
1579/// reverse order per part.
1581 Type *IndexedTy;
1582 bool IsReverse;
1583
1584public:
1585 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1586 bool IsInBounds, DebugLoc DL)
1587 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1588 GEPFlagsTy(IsInBounds), DL),
1589 IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1590
1591 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1592
1593 void execute(VPTransformState &State) override;
1594
1595 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1597 "Op must be an operand of the recipe");
1598 return true;
1599 }
1600
1602 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1603 isInBounds(), getDebugLoc());
1604 }
1605
1606#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1607 /// Print the recipe.
1608 void print(raw_ostream &O, const Twine &Indent,
1609 VPSlotTracker &SlotTracker) const override;
1610#endif
1611};
1612
1613/// A pure virtual base class for all recipes modeling header phis, including
1614/// phis for first order recurrences, pointer inductions and reductions. The
1615/// start value is the first operand of the recipe and the incoming value from
1616/// the backedge is the second operand.
1617///
1618/// Inductions are modeled using the following sub-classes:
1619/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1620/// starting at a specified value (zero for the main vector loop, the resume
1621/// value for the epilogue vector loop) and stepping by 1. The induction
1622/// controls exiting of the vector loop by comparing against the vector trip
1623/// count. Produces a single scalar PHI for the induction value per
1624/// iteration.
1625/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1626/// floating point inductions with arbitrary start and step values. Produces
1627/// a vector PHI per-part.
1628/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1629/// value of an IV with different start and step values. Produces a single
1630/// scalar value per iteration
1631/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1632/// canonical or derived induction.
1633/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1634/// pointer induction. Produces either a vector PHI per-part or scalar values
1635/// per-lane based on the canonical induction.
1637protected:
1638 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1639 VPValue *Start = nullptr, DebugLoc DL = {})
1640 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1641 if (Start)
1642 addOperand(Start);
1643 }
1644
1645public:
1646 ~VPHeaderPHIRecipe() override = default;
1647
1648 /// Method to support type inquiry through isa, cast, and dyn_cast.
1649 static inline bool classof(const VPRecipeBase *B) {
1650 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1651 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1652 }
1653 static inline bool classof(const VPValue *V) {
1654 auto *B = V->getDefiningRecipe();
1655 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1656 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1657 }
1658
1659 /// Generate the phi nodes.
1660 void execute(VPTransformState &State) override = 0;
1661
1662#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1663 /// Print the recipe.
1664 void print(raw_ostream &O, const Twine &Indent,
1665 VPSlotTracker &SlotTracker) const override = 0;
1666#endif
1667
1668 /// Returns the start value of the phi, if one is set.
1670 return getNumOperands() == 0 ? nullptr : getOperand(0);
1671 }
1673 return getNumOperands() == 0 ? nullptr : getOperand(0);
1674 }
1675
1676 /// Update the start value of the recipe.
1678
1679 /// Returns the incoming value from the loop backedge.
1681 return getOperand(1);
1682 }
1683
1684 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1685 /// to be a recipe.
1688 }
1689};
1690
1691/// A recipe for handling phi nodes of integer and floating-point inductions,
1692/// producing their vector values.
1694 PHINode *IV;
1695 TruncInst *Trunc;
1696 const InductionDescriptor &IndDesc;
1697
1698public:
1700 const InductionDescriptor &IndDesc)
1701 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1702 Trunc(nullptr), IndDesc(IndDesc) {
1703 addOperand(Step);
1704 }
1705
1707 const InductionDescriptor &IndDesc,
1708 TruncInst *Trunc)
1709 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1710 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1711 addOperand(Step);
1712 }
1713
1715
1718 getStepValue(), IndDesc, Trunc);
1719 }
1720
1721 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1722
1723 /// Generate the vectorized and scalarized versions of the phi node as
1724 /// needed by their users.
1725 void execute(VPTransformState &State) override;
1726
1727#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1728 /// Print the recipe.
1729 void print(raw_ostream &O, const Twine &Indent,
1730 VPSlotTracker &SlotTracker) const override;
1731#endif
1732
1734 // TODO: All operands of base recipe must exist and be at same index in
1735 // derived recipe.
1737 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1738 }
1739
1741 // TODO: All operands of base recipe must exist and be at same index in
1742 // derived recipe.
1744 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1745 }
1746
1747 /// Returns the step value of the induction.
1749 const VPValue *getStepValue() const { return getOperand(1); }
1750
1751 /// Returns the first defined value as TruncInst, if it is one or nullptr
1752 /// otherwise.
1753 TruncInst *getTruncInst() { return Trunc; }
1754 const TruncInst *getTruncInst() const { return Trunc; }
1755
1756 PHINode *getPHINode() { return IV; }
1757
1758 /// Returns the induction descriptor for the recipe.
1759 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1760
1761 /// Returns true if the induction is canonical, i.e. starting at 0 and
1762 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
1763 /// same type as the canonical induction.
1764 bool isCanonical() const;
1765
1766 /// Returns the scalar type of the induction.
1768 return Trunc ? Trunc->getType() : IV->getType();
1769 }
1770};
1771
1773 const InductionDescriptor &IndDesc;
1774
1775 bool IsScalarAfterVectorization;
1776
1777public:
1778 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1779 /// Start.
1781 const InductionDescriptor &IndDesc,
1782 bool IsScalarAfterVectorization)
1783 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1784 IndDesc(IndDesc),
1785 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1786 addOperand(Start);
1787 addOperand(Step);
1788 }
1789
1791
1794 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
1795 IndDesc, IsScalarAfterVectorization);
1796 }
1797
1798 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1799
1800 /// Generate vector values for the pointer induction.
1801 void execute(VPTransformState &State) override;
1802
1803 /// Returns true if only scalar values will be generated.
1804 bool onlyScalarsGenerated(bool IsScalable);
1805
1806 /// Returns the induction descriptor for the recipe.
1807 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1808
1809#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1810 /// Print the recipe.
1811 void print(raw_ostream &O, const Twine &Indent,
1812 VPSlotTracker &SlotTracker) const override;
1813#endif
1814};
1815
1816/// A recipe for handling phis that are widened in the vector loop.
1817/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1818/// managed in the recipe directly.
1820 /// List of incoming blocks. Only used in the VPlan native path.
1821 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1822
1823public:
1824 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1825 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1826 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1827 if (Start)
1828 addOperand(Start);
1829 }
1830
1832 llvm_unreachable("cloning not implemented yet");
1833 }
1834
1835 ~VPWidenPHIRecipe() override = default;
1836
1837 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1838
1839 /// Generate the phi/select nodes.
1840 void execute(VPTransformState &State) override;
1841
1842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1843 /// Print the recipe.
1844 void print(raw_ostream &O, const Twine &Indent,
1845 VPSlotTracker &SlotTracker) const override;
1846#endif
1847
1848 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1849 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1850 addOperand(IncomingV);
1851 IncomingBlocks.push_back(IncomingBlock);
1852 }
1853
1854 /// Returns the \p I th incoming VPBasicBlock.
1855 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1856
1857 /// Returns the \p I th incoming VPValue.
1858 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1859};
1860
1861/// A recipe for handling first-order recurrence phis. The start value is the
1862/// first operand of the recipe and the incoming value from the backedge is the
1863/// second operand.
1866 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1867
1868 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1869
1871 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1872 }
1873
1876 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
1877 }
1878
1879 void execute(VPTransformState &State) override;
1880
1881#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1882 /// Print the recipe.
1883 void print(raw_ostream &O, const Twine &Indent,
1884 VPSlotTracker &SlotTracker) const override;
1885#endif
1886};
1887
1888/// A recipe for handling reduction phis. The start value is the first operand
1889/// of the recipe and the incoming value from the backedge is the second
1890/// operand.
1892 /// Descriptor for the reduction.
1893 const RecurrenceDescriptor &RdxDesc;
1894
1895 /// The phi is part of an in-loop reduction.
1896 bool IsInLoop;
1897
1898 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1899 bool IsOrdered;
1900
1901public:
1902 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1903 /// RdxDesc.
1905 VPValue &Start, bool IsInLoop = false,
1906 bool IsOrdered = false)
1907 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1908 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1909 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1910 }
1911
1912 ~VPReductionPHIRecipe() override = default;
1913
1915 auto *R =
1916 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
1917 *getOperand(0), IsInLoop, IsOrdered);
1918 R->addOperand(getBackedgeValue());
1919 return R;
1920 }
1921
1922 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1923
1925 return R->getVPDefID() == VPDef::VPReductionPHISC;
1926 }
1927
1928 /// Generate the phi/select nodes.
1929 void execute(VPTransformState &State) override;
1930
1931#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1932 /// Print the recipe.
1933 void print(raw_ostream &O, const Twine &Indent,
1934 VPSlotTracker &SlotTracker) const override;
1935#endif
1936
1938 return RdxDesc;
1939 }
1940
1941 /// Returns true, if the phi is part of an ordered reduction.
1942 bool isOrdered() const { return IsOrdered; }
1943
1944 /// Returns true, if the phi is part of an in-loop reduction.
1945 bool isInLoop() const { return IsInLoop; }
1946};
1947
1948/// A recipe for vectorizing a phi-node as a sequence of mask-based select
1949/// instructions.
1951public:
1952 /// The blend operation is a User of the incoming values and of their
1953 /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
1954 /// incoming value does not have a mask associated.
1956 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
1957 assert((Operands.size() + 1) % 2 == 0 &&
1958 "Expected an odd number of operands");
1959 }
1960
1961 VPBlendRecipe *clone() override {
1963 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
1964 }
1965
1966 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1967
1968 /// Return the number of incoming values, taking into account that the first
1969 /// incoming value has no mask.
1970 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1971
1972 /// Return incoming value number \p Idx.
1973 VPValue *getIncomingValue(unsigned Idx) const {
1974 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
1975 }
1976
1977 /// Return mask number \p Idx.
1978 VPValue *getMask(unsigned Idx) const {
1979 assert(Idx > 0 && "First index has no mask associated.");
1980 return getOperand(Idx * 2);
1981 }
1982
1983 /// Generate the phi/select nodes.
1984 void execute(VPTransformState &State) override;
1985
1986#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1987 /// Print the recipe.
1988 void print(raw_ostream &O, const Twine &Indent,
1989 VPSlotTracker &SlotTracker) const override;
1990#endif
1991
1992 /// Returns true if the recipe only uses the first lane of operand \p Op.
1993 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1995 "Op must be an operand of the recipe");
1996 // Recursing through Blend recipes only, must terminate at header phi's the
1997 // latest.
1998 return all_of(users(),
1999 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2000 }
2001};
2002
2003/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2004/// or stores into one wide load/store and shuffles. The first operand of a
2005/// VPInterleave recipe is the address, followed by the stored values, followed
2006/// by an optional mask.
2009
2010 /// Indicates if the interleave group is in a conditional block and requires a
2011 /// mask.
2012 bool HasMask = false;
2013
2014 /// Indicates if gaps between members of the group need to be masked out or if
2015 /// unusued gaps can be loaded speculatively.
2016 bool NeedsMaskForGaps = false;
2017
2018public:
2020 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2021 bool NeedsMaskForGaps)
2022 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2023 NeedsMaskForGaps(NeedsMaskForGaps) {
2024 for (unsigned i = 0; i < IG->getFactor(); ++i)
2025 if (Instruction *I = IG->getMember(i)) {
2026 if (I->getType()->isVoidTy())
2027 continue;
2028 new VPValue(I, this);
2029 }
2030
2031 for (auto *SV : StoredValues)
2032 addOperand(SV);
2033 if (Mask) {
2034 HasMask = true;
2035 addOperand(Mask);
2036 }
2037 }
2038 ~VPInterleaveRecipe() override = default;
2039
2041 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2042 NeedsMaskForGaps);
2043 }
2044
2045 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2046
2047 /// Return the address accessed by this recipe.
2048 VPValue *getAddr() const {
2049 return getOperand(0); // Address is the 1st, mandatory operand.
2050 }
2051
2052 /// Return the mask used by this recipe. Note that a full mask is represented
2053 /// by a nullptr.
2054 VPValue *getMask() const {
2055 // Mask is optional and therefore the last, currently 2nd operand.
2056 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2057 }
2058
2059 /// Return the VPValues stored by this interleave group. If it is a load
2060 /// interleave group, return an empty ArrayRef.
2062 // The first operand is the address, followed by the stored values, followed
2063 // by an optional mask.
2066 }
2067
2068 /// Generate the wide load or store, and shuffles.
2069 void execute(VPTransformState &State) override;
2070
2071#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2072 /// Print the recipe.
2073 void print(raw_ostream &O, const Twine &Indent,
2074 VPSlotTracker &SlotTracker) const override;
2075#endif
2076
2078
2079 /// Returns the number of stored operands of this interleave group. Returns 0
2080 /// for load interleave groups.
2081 unsigned getNumStoreOperands() const {
2082 return getNumOperands() - (HasMask ? 2 : 1);
2083 }
2084
2085 /// The recipe only uses the first lane of the address.
2086 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2088 "Op must be an operand of the recipe");
2089 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2090 }
2091};
2092
2093/// A recipe to represent inloop reduction operations, performing a reduction on
2094/// a vector operand into a scalar value, and adding the result to a chain.
2095/// The Operands are {ChainOp, VecOp, [Condition]}.
2097 /// The recurrence decriptor for the reduction in question.
2098 const RecurrenceDescriptor &RdxDesc;
2099 bool IsOrdered;
2100
2101public:
2103 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2104 bool IsOrdered)
2105 : VPSingleDefRecipe(VPDef::VPReductionSC,
2106 ArrayRef<VPValue *>({ChainOp, VecOp}), I),
2107 RdxDesc(R), IsOrdered(IsOrdered) {
2108 if (CondOp)
2109 addOperand(CondOp);
2110 }
2111
2112 ~VPReductionRecipe() override = default;
2113
2115 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2116 getVecOp(), getCondOp(), IsOrdered);
2117 }
2118
2119 VP_CLASSOF_IMPL(VPDef::VPReductionSC)
2120
2121 /// Generate the reduction in the loop
2122 void execute(VPTransformState &State) override;
2123
2124#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2125 /// Print the recipe.
2126 void print(raw_ostream &O, const Twine &Indent,
2127 VPSlotTracker &SlotTracker) const override;
2128#endif
2129
2130 /// The VPValue of the scalar Chain being accumulated.
2131 VPValue *getChainOp() const { return getOperand(0); }
2132 /// The VPValue of the vector value to be reduced.
2133 VPValue *getVecOp() const { return getOperand(1); }
2134 /// The VPValue of the condition for the block.
2136 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2137 }
2138};
2139
2140/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2141/// copies of the original scalar type, one per lane, instead of producing a
2142/// single copy of widened type for all lanes. If the instruction is known to be
2143/// uniform only one copy, per lane zero, will be generated.
2145 /// Indicator if only a single replica per lane is needed.
2146 bool IsUniform;
2147
2148 /// Indicator if the replicas are also predicated.
2149 bool IsPredicated;
2150
2151public:
2152 template <typename IterT>
2154 bool IsUniform, VPValue *Mask = nullptr)
2155 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2156 IsUniform(IsUniform), IsPredicated(Mask) {
2157 if (Mask)
2158 addOperand(Mask);
2159 }
2160
2161 ~VPReplicateRecipe() override = default;
2162
2164 auto *Copy =
2165 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2166 isPredicated() ? getMask() : nullptr);
2167 Copy->transferFlags(*this);
2168 return Copy;
2169 }
2170
2171 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2172
2173 /// Generate replicas of the desired Ingredient. Replicas will be generated
2174 /// for all parts and lanes unless a specific part and lane are specified in
2175 /// the \p State.
2176 void execute(VPTransformState &State) override;
2177
2178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2179 /// Print the recipe.
2180 void print(raw_ostream &O, const Twine &Indent,
2181 VPSlotTracker &SlotTracker) const override;
2182#endif
2183
2184 bool isUniform() const { return IsUniform; }
2185
2186 bool isPredicated() const { return IsPredicated; }
2187
2188 /// Returns true if the recipe only uses the first lane of operand \p Op.
2189 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2191 "Op must be an operand of the recipe");
2192 return isUniform();
2193 }
2194
2195 /// Returns true if the recipe uses scalars of operand \p Op.
2196 bool usesScalars(const VPValue *Op) const override {
2198 "Op must be an operand of the recipe");
2199 return true;
2200 }
2201
2202 /// Returns true if the recipe is used by a widened recipe via an intervening
2203 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2204 /// in a vector.
2205 bool shouldPack() const;
2206
2207 /// Return the mask of a predicated VPReplicateRecipe.
2209 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2210 return getOperand(getNumOperands() - 1);
2211 }
2212
2213 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2214};
2215
2216/// A recipe for generating conditional branches on the bits of a mask.
2218public:
2220 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2221 if (BlockInMask) // nullptr means all-one mask.
2222 addOperand(BlockInMask);
2223 }
2224
2226 return new VPBranchOnMaskRecipe(getOperand(0));
2227 }
2228
2229 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2230
2231 /// Generate the extraction of the appropriate bit from the block mask and the
2232 /// conditional branch.
2233 void execute(VPTransformState &State) override;
2234
2235#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2236 /// Print the recipe.
2237 void print(raw_ostream &O, const Twine &Indent,
2238 VPSlotTracker &SlotTracker) const override {
2239 O << Indent << "BRANCH-ON-MASK ";
2240 if (VPValue *Mask = getMask())
2241 Mask->printAsOperand(O, SlotTracker);
2242 else
2243 O << " All-One";
2244 }
2245#endif
2246
2247 /// Return the mask used by this recipe. Note that a full mask is represented
2248 /// by a nullptr.
2249 VPValue *getMask() const {
2250 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2251 // Mask is optional.
2252 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2253 }
2254
2255 /// Returns true if the recipe uses scalars of operand \p Op.
2256 bool usesScalars(const VPValue *Op) const override {
2258 "Op must be an operand of the recipe");
2259 return true;
2260 }
2261};
2262
2263/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2264/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2265/// order to merge values that are set under such a branch and feed their uses.
2266/// The phi nodes can be scalar or vector depending on the users of the value.
2267/// This recipe works in concert with VPBranchOnMaskRecipe.
2269public:
2270 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2271 /// nodes after merging back from a Branch-on-Mask.
2273 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2274 ~VPPredInstPHIRecipe() override = default;
2275
2277 return new VPPredInstPHIRecipe(getOperand(0));
2278 }
2279
2280 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2281
2282 /// Generates phi nodes for live-outs as needed to retain SSA form.
2283 void execute(VPTransformState &State) override;
2284
2285#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2286 /// Print the recipe.
2287 void print(raw_ostream &O, const Twine &Indent,
2288 VPSlotTracker &SlotTracker) const override;
2289#endif
2290
2291 /// Returns true if the recipe uses scalars of operand \p Op.
2292 bool usesScalars(const VPValue *Op) const override {
2294 "Op must be an operand of the recipe");
2295 return true;
2296 }
2297};
2298
2299/// A common base class for widening memory operations. An optional mask can be
2300/// provided as the last operand.
2302protected:
2304
2305 /// Whether the accessed addresses are consecutive.
2307
2308 /// Whether the consecutive accessed addresses are in reverse order.
2310
2311 /// Whether the memory access is masked.
2312 bool IsMasked = false;
2313
2314 void setMask(VPValue *Mask) {
2315 assert(!IsMasked && "cannot re-set mask");
2316 if (!Mask)
2317 return;
2318 addOperand(Mask);
2319 IsMasked = true;
2320 }
2321
2322 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2323 std::initializer_list<VPValue *> Operands,
2324 bool Consecutive, bool Reverse, DebugLoc DL)
2326 Reverse(Reverse) {
2327 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2328 }
2329
2330public:
2332 llvm_unreachable("cloning not supported");
2333 }
2334
2335 static inline bool classof(const VPRecipeBase *R) {
2336 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2337 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2338 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2339 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2340 }
2341
2342 static inline bool classof(const VPUser *U) {
2343 auto *R = dyn_cast<VPRecipeBase>(U);
2344 return R && classof(R);
2345 }
2346
2347 /// Return whether the loaded-from / stored-to addresses are consecutive.
2348 bool isConsecutive() const { return Consecutive; }
2349
2350 /// Return whether the consecutive loaded/stored addresses are in reverse
2351 /// order.
2352 bool isReverse() const { return Reverse; }
2353
2354 /// Return the address accessed by this recipe.
2355 VPValue *getAddr() const { return getOperand(0); }
2356
2357 /// Returns true if the recipe is masked.
2358 bool isMasked() const { return IsMasked; }
2359
2360 /// Return the mask used by this recipe. Note that a full mask is represented
2361 /// by a nullptr.
2362 VPValue *getMask() const {
2363 // Mask is optional and therefore the last operand.
2364 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2365 }
2366
2367 /// Generate the wide load/store.
2368 void execute(VPTransformState &State) override {
2369 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2370 }
2371
2373};
2374
2375/// A recipe for widening load operations, using the address to load from and an
2376/// optional mask.
2377struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2379 bool Consecutive, bool Reverse, DebugLoc DL)
2380 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2381 Reverse, DL),
2382 VPValue(this, &Load) {
2383 setMask(Mask);
2384 }
2385
2387 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2389 getDebugLoc());
2390 }
2391
2392 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2393
2394 /// Generate a wide load or gather.
2395 void execute(VPTransformState &State) override;
2396
2397#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2398 /// Print the recipe.
2399 void print(raw_ostream &O, const Twine &Indent,
2400 VPSlotTracker &SlotTracker) const override;
2401#endif
2402
2403 /// Returns true if the recipe only uses the first lane of operand \p Op.
2404 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2406 "Op must be an operand of the recipe");
2407 // Widened, consecutive loads operations only demand the first lane of
2408 // their address.
2409 return Op == getAddr() && isConsecutive();
2410 }
2411};
2412
2413/// A recipe for widening load operations with vector-predication intrinsics,
2414/// using the address to load from, the explicit vector length and an optional
2415/// mask.
2416struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2418 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2419 {L->getAddr(), EVL}, L->isConsecutive(),
2420 L->isReverse(), L->getDebugLoc()),
2421 VPValue(this, &getIngredient()) {
2422 setMask(Mask);
2423 }
2424
2425 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2426
2427 /// Return the EVL operand.
2428 VPValue *getEVL() const { return getOperand(1); }
2429
2430 /// Generate the wide load or gather.
2431 void execute(VPTransformState &State) override;
2432
2433#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2434 /// Print the recipe.
2435 void print(raw_ostream &O, const Twine &Indent,
2436 VPSlotTracker &SlotTracker) const override;
2437#endif
2438
2439 /// Returns true if the recipe only uses the first lane of operand \p Op.
2440 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2442 "Op must be an operand of the recipe");
2443 // Widened loads only demand the first lane of EVL and consecutive loads
2444 // only demand the first lane of their address.
2445 return Op == getEVL() || (Op == getAddr() && isConsecutive());
2446 }
2447};
2448
2449/// A recipe for widening store operations, using the stored value, the address
2450/// to store to and an optional mask.
2453 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2454 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2456 setMask(Mask);
2457 }
2458
2460 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
2462 Reverse, getDebugLoc());
2463 }
2464
2465 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2466
2467 /// Return the value stored by this recipe.
2468 VPValue *getStoredValue() const { return getOperand(1); }
2469
2470 /// Generate a wide store or scatter.
2471 void execute(VPTransformState &State) override;
2472
2473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2474 /// Print the recipe.
2475 void print(raw_ostream &O, const Twine &Indent,
2476 VPSlotTracker &SlotTracker) const override;
2477#endif
2478
2479 /// Returns true if the recipe only uses the first lane of operand \p Op.
2480 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2482 "Op must be an operand of the recipe");
2483 // Widened, consecutive stores only demand the first lane of their address,
2484 // unless the same operand is also stored.
2485 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2486 }
2487};
2488
2489/// A recipe for widening store operations with vector-predication intrinsics,
2490/// using the value to store, the address to store to, the explicit vector
2491/// length and an optional mask.
2494 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2495 {S->getAddr(), S->getStoredValue(), EVL},
2496 S->isConsecutive(), S->isReverse(),
2497 S->getDebugLoc()) {
2498 setMask(Mask);
2499 }
2500
2501 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2502
2503 /// Return the address accessed by this recipe.
2504 VPValue *getStoredValue() const { return getOperand(1); }
2505
2506 /// Return the EVL operand.
2507 VPValue *getEVL() const { return getOperand(2); }
2508
2509 /// Generate the wide store or scatter.
2510 void execute(VPTransformState &State) override;
2511
2512#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2513 /// Print the recipe.
2514 void print(raw_ostream &O, const Twine &Indent,
2515 VPSlotTracker &SlotTracker) const override;
2516#endif
2517
2518 /// Returns true if the recipe only uses the first lane of operand \p Op.
2519 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2521 "Op must be an operand of the recipe");
2522 if (Op == getEVL()) {
2523 assert(getStoredValue() != Op && "unexpected store of EVL");
2524 return true;
2525 }
2526 // Widened, consecutive memory operations only demand the first lane of
2527 // their address, unless the same operand is also stored. That latter can
2528 // happen with opaque pointers.
2529 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2530 }
2531};
2532
2533/// Recipe to expand a SCEV expression.
2535 const SCEV *Expr;
2536 ScalarEvolution &SE;
2537
2538public:
2540 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2541
2542 ~VPExpandSCEVRecipe() override = default;
2543
2545 return new VPExpandSCEVRecipe(Expr, SE);
2546 }
2547
2548 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2549
2550 /// Generate a canonical vector induction variable of the vector loop, with
2551 void execute(VPTransformState &State) override;
2552
2553#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2554 /// Print the recipe.
2555 void print(raw_ostream &O, const Twine &Indent,
2556 VPSlotTracker &SlotTracker) const override;
2557#endif
2558
2559 const SCEV *getSCEV() const { return Expr; }
2560};
2561
2562/// Canonical scalar induction phi of the vector loop. Starting at the specified
2563/// start value (either 0 or the resume value when vectorizing the epilogue
2564/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2565/// canonical induction variable.
2567public:
2569 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2570
2571 ~VPCanonicalIVPHIRecipe() override = default;
2572
2574 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
2575 R->addOperand(getBackedgeValue());
2576 return R;
2577 }
2578
2579 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2580
2582 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2583 }
2584
2585 /// Generate the canonical scalar induction phi of the vector loop.
2586 void execute(VPTransformState &State) override;
2587
2588#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2589 /// Print the recipe.
2590 void print(raw_ostream &O, const Twine &Indent,
2591 VPSlotTracker &SlotTracker) const override;
2592#endif
2593
2594 /// Returns the scalar type of the induction.
2596 return getStartValue()->getLiveInIRValue()->getType();
2597 }
2598
2599 /// Returns true if the recipe only uses the first lane of operand \p Op.
2600 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2602 "Op must be an operand of the recipe");
2603 return true;
2604 }
2605
2606 /// Returns true if the recipe only uses the first part of operand \p Op.
2607 bool onlyFirstPartUsed(const VPValue *Op) const override {
2609 "Op must be an operand of the recipe");
2610 return true;
2611 }
2612
2613 /// Check if the induction described by \p Kind, /p Start and \p Step is
2614 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2616 VPValue *Step) const;
2617};
2618
2619/// A recipe for generating the active lane mask for the vector loop that is
2620/// used to predicate the vector operations.
2621/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2622/// remove VPActiveLaneMaskPHIRecipe.
2624public:
2626 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2627 DL) {}
2628
2629 ~VPActiveLaneMaskPHIRecipe() override = default;
2630
2633 }
2634
2635 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2636
2638 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2639 }
2640
2641 /// Generate the active lane mask phi of the vector loop.
2642 void execute(VPTransformState &State) override;
2643
2644#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2645 /// Print the recipe.
2646 void print(raw_ostream &O, const Twine &Indent,
2647 VPSlotTracker &SlotTracker) const override;
2648#endif
2649};
2650
2651/// A recipe for generating the phi node for the current index of elements,
2652/// adjusted in accordance with EVL value. It starts at the start value of the
2653/// canonical induction and gets incremented by EVL in each iteration of the
2654/// vector loop.
2656public:
2658 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2659
2660 ~VPEVLBasedIVPHIRecipe() override = default;
2661
2663 llvm_unreachable("cloning not implemented yet");
2664 }
2665
2666 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2667
2669 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2670 }
2671
2672 /// Generate phi for handling IV based on EVL over iterations correctly.
2673 /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2674 void execute(VPTransformState &State) override;
2675
2676 /// Returns true if the recipe only uses the first lane of operand \p Op.
2677 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2679 "Op must be an operand of the recipe");
2680 return true;
2681 }
2682
2683#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2684 /// Print the recipe.
2685 void print(raw_ostream &O, const Twine &Indent,
2686 VPSlotTracker &SlotTracker) const override;
2687#endif
2688};
2689
2690/// A Recipe for widening the canonical induction variable of the vector loop.
2692public:
2694 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2695
2696 ~VPWidenCanonicalIVRecipe() override = default;
2697
2699 return new VPWidenCanonicalIVRecipe(
2700 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
2701 }
2702
2703 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2704
2705 /// Generate a canonical vector induction variable of the vector loop, with
2706 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2707 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2708 void execute(VPTransformState &State) override;
2709
2710#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2711 /// Print the recipe.
2712 void print(raw_ostream &O, const Twine &Indent,
2713 VPSlotTracker &SlotTracker) const override;
2714#endif
2715};
2716
2717/// A recipe for converting the input value \p IV value to the corresponding
2718/// value of an IV with different start and step values, using Start + IV *
2719/// Step.
2721 /// Kind of the induction.
2723 /// If not nullptr, the floating point induction binary operator. Must be set
2724 /// for floating point inductions.
2725 const FPMathOperator *FPBinOp;
2726
2727public:
2729 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2731 IndDesc.getKind(),
2732 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2733 Start, CanonicalIV, Step) {}
2734
2736 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2737 VPValue *Step)
2738 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2739 FPBinOp(FPBinOp) {}
2740
2741 ~VPDerivedIVRecipe() override = default;
2742
2744 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
2745 getStepValue());
2746 }
2747
2748 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2749
2750 /// Generate the transformed value of the induction at offset StartValue (1.
2751 /// operand) + IV (2. operand) * StepValue (3, operand).
2752 void execute(VPTransformState &State) override;
2753
2754#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2755 /// Print the recipe.
2756 void print(raw_ostream &O, const Twine &Indent,
2757 VPSlotTracker &SlotTracker) const override;
2758#endif
2759
2761 return getStartValue()->getLiveInIRValue()->getType();
2762 }
2763
2764 VPValue *getStartValue() const { return getOperand(0); }
2765 VPValue *getStepValue() const { return getOperand(2); }
2766
2767 /// Returns true if the recipe only uses the first lane of operand \p Op.
2768 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2770 "Op must be an operand of the recipe");
2771 return true;
2772 }
2773};
2774
2775/// A recipe for handling phi nodes of integer and floating-point inductions,
2776/// producing their scalar values.
2778 Instruction::BinaryOps InductionOpcode;
2779
2780public:
2783 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2784 ArrayRef<VPValue *>({IV, Step}), FMFs),
2785 InductionOpcode(Opcode) {}
2786
2788 VPValue *Step)
2790 IV, Step, IndDesc.getInductionOpcode(),
2791 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2792 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2793 : FastMathFlags()) {}
2794
2795 ~VPScalarIVStepsRecipe() override = default;
2796
2798 return new VPScalarIVStepsRecipe(
2799 getOperand(0), getOperand(1), InductionOpcode,
2801 }
2802
2803 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2804
2805 /// Generate the scalarized versions of the phi node as needed by their users.
2806 void execute(VPTransformState &State) override;
2807
2808#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2809 /// Print the recipe.
2810 void print(raw_ostream &O, const Twine &Indent,
2811 VPSlotTracker &SlotTracker) const override;
2812#endif
2813
2814 VPValue *getStepValue() const { return getOperand(1); }
2815
2816 /// Returns true if the recipe only uses the first lane of operand \p Op.
2817 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2819 "Op must be an operand of the recipe");
2820 return true;
2821 }
2822};
2823
2824/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2825/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2826/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2828public:
2830
2831private:
2832 /// The VPRecipes held in the order of output instructions to generate.
2833 RecipeListTy Recipes;
2834
2835public:
2836 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2837 : VPBlockBase(VPBasicBlockSC, Name.str()) {
2838 if (Recipe)
2839 appendRecipe(Recipe);
2840 }
2841
2842 ~VPBasicBlock() override {
2843 while (!Recipes.empty())
2844 Recipes.pop_back();
2845 }
2846
2847 /// Instruction iterators...
2852
2853 //===--------------------------------------------------------------------===//
2854 /// Recipe iterator methods
2855 ///
2856 inline iterator begin() { return Recipes.begin(); }
2857 inline const_iterator begin() const { return Recipes.begin(); }
2858 inline iterator end() { return Recipes.end(); }
2859 inline const_iterator end() const { return Recipes.end(); }
2860
2861 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
2862 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
2863 inline reverse_iterator rend() { return Recipes.rend(); }
2864 inline const_reverse_iterator rend() const { return Recipes.rend(); }
2865
2866 inline size_t size() const { return Recipes.size(); }
2867 inline bool empty() const { return Recipes.empty(); }
2868 inline const VPRecipeBase &front() const { return Recipes.front(); }
2869 inline VPRecipeBase &front() { return Recipes.front(); }
2870 inline const VPRecipeBase &back() const { return Recipes.back(); }
2871 inline VPRecipeBase &back() { return Recipes.back(); }
2872
2873 /// Returns a reference to the list of recipes.
2874 RecipeListTy &getRecipeList() { return Recipes; }
2875
2876 /// Returns a pointer to a member of the recipe list.
2878 return &VPBasicBlock::Recipes;
2879 }
2880
2881 /// Method to support type inquiry through isa, cast, and dyn_cast.
2882 static inline bool classof(const VPBlockBase *V) {
2883 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC;
2884 }
2885
2886 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2887 assert(Recipe && "No recipe to append.");
2888 assert(!Recipe->Parent && "Recipe already in VPlan");
2889 Recipe->Parent = this;
2890 Recipes.insert(InsertPt, Recipe);
2891 }
2892
2893 /// Augment the existing recipes of a VPBasicBlock with an additional
2894 /// \p Recipe as the last recipe.
2895 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2896
2897 /// The method which generates the output IR instructions that correspond to
2898 /// this VPBasicBlock, thereby "executing" the VPlan.
2899 void execute(VPTransformState *State) override;
2900
2901 /// Return the position of the first non-phi node recipe in the block.
2903
2904 /// Returns an iterator range over the PHI-like recipes in the block.
2906 return make_range(begin(), getFirstNonPhi());
2907 }
2908
2909 void dropAllReferences(VPValue *NewValue) override;
2910
2911 /// Split current block at \p SplitAt by inserting a new block between the
2912 /// current block and its successors and moving all recipes starting at
2913 /// SplitAt to the new block. Returns the new block.
2914 VPBasicBlock *splitAt(iterator SplitAt);
2915
2917
2918#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2919 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2920 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2921 ///
2922 /// Note that the numbering is applied to the whole VPlan, so printing
2923 /// individual blocks is consistent with the whole VPlan printing.
2924 void print(raw_ostream &O, const Twine &Indent,
2925 VPSlotTracker &SlotTracker) const override;
2926 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2927#endif
2928
2929 /// If the block has multiple successors, return the branch recipe terminating
2930 /// the block. If there are no or only a single successor, return nullptr;
2932 const VPRecipeBase *getTerminator() const;
2933
2934 /// Returns true if the block is exiting it's parent region.
2935 bool isExiting() const;
2936
2937 /// Clone the current block and it's recipes, without updating the operands of
2938 /// the cloned recipes.
2939 VPBasicBlock *clone() override {
2940 auto *NewBlock = new VPBasicBlock(getName());
2941 for (VPRecipeBase &R : *this)
2942 NewBlock->appendRecipe(R.clone());
2943 return NewBlock;
2944 }
2945
2946private:
2947 /// Create an IR BasicBlock to hold the output instructions generated by this
2948 /// VPBasicBlock, and return it. Update the CFGState accordingly.
2949 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2950};
2951
2952/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
2953/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
2954/// A VPRegionBlock may indicate that its contents are to be replicated several
2955/// times. This is designed to support predicated scalarization, in which a
2956/// scalar if-then code structure needs to be generated VF * UF times. Having
2957/// this replication indicator helps to keep a single model for multiple
2958/// candidate VF's. The actual replication takes place only once the desired VF
2959/// and UF have been determined.
2961 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
2962 VPBlockBase *Entry;
2963
2964 /// Hold the Single Exiting block of the SESE region modelled by the
2965 /// VPRegionBlock.
2966 VPBlockBase *Exiting;
2967
2968 /// An indicator whether this region is to generate multiple replicated
2969 /// instances of output IR corresponding to its VPBlockBases.
2970 bool IsReplicator;
2971
2972public:
2974 const std::string &Name = "", bool IsReplicator = false)
2975 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
2976 IsReplicator(IsReplicator) {
2977 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
2978 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
2979 Entry->setParent(this);
2980 Exiting->setParent(this);
2981 }
2982 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
2983 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
2984 IsReplicator(IsReplicator) {}
2985
2986 ~VPRegionBlock() override {
2987 if (Entry) {
2988 VPValue DummyValue;
2989 Entry->dropAllReferences(&DummyValue);
2990 deleteCFG(Entry);
2991 }
2992 }
2993
2994 /// Method to support type inquiry through isa, cast, and dyn_cast.
2995 static inline bool classof(const VPBlockBase *V) {
2996 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
2997 }
2998
2999 const VPBlockBase *getEntry() const { return Entry; }
3000 VPBlockBase *getEntry() { return Entry; }
3001
3002 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3003 /// EntryBlock must have no predecessors.
3004 void setEntry(VPBlockBase *EntryBlock) {
3005 assert(EntryBlock->getPredecessors().empty() &&
3006 "Entry block cannot have predecessors.");
3007 Entry = EntryBlock;
3008 EntryBlock->setParent(this);
3009 }
3010
3011 const VPBlockBase *getExiting() const { return Exiting; }
3012 VPBlockBase *getExiting() { return Exiting; }
3013
3014 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3015 /// ExitingBlock must have no successors.
3016 void setExiting(VPBlockBase *ExitingBlock) {
3017 assert(ExitingBlock->getSuccessors().empty() &&
3018 "Exit block cannot have successors.");
3019 Exiting = ExitingBlock;
3020 ExitingBlock->setParent(this);
3021 }
3022
3023 /// Returns the pre-header VPBasicBlock of the loop region.
3025 assert(!isReplicator() && "should only get pre-header of loop regions");
3027 }
3028
3029 /// An indicator whether this region is to generate multiple replicated
3030 /// instances of output IR corresponding to its VPBlockBases.
3031 bool isReplicator() const { return IsReplicator; }
3032
3033 /// The method which generates the output IR instructions that correspond to
3034 /// this VPRegionBlock, thereby "executing" the VPlan.
3035 void execute(VPTransformState *State) override;
3036
3037 void dropAllReferences(VPValue *NewValue) override;
3038
3039#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3040 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3041 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3042 /// consequtive numbers.
3043 ///
3044 /// Note that the numbering is applied to the whole VPlan, so printing
3045 /// individual regions is consistent with the whole VPlan printing.
3046 void print(raw_ostream &O, const Twine &Indent,
3047 VPSlotTracker &SlotTracker) const override;
3048 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3049#endif
3050
3051 /// Clone all blocks in the single-entry single-exit region of the block and
3052 /// their recipes without updating the operands of the cloned recipes.
3053 VPRegionBlock *clone() override;
3054};
3055
3056/// VPlan models a candidate for vectorization, encoding various decisions take
3057/// to produce efficient output IR, including which branches, basic-blocks and
3058/// output IR instructions to generate, and their cost. VPlan holds a
3059/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3060/// VPBasicBlock.
3061class VPlan {
3062 friend class VPlanPrinter;
3063 friend class VPSlotTracker;
3064
3065 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3066 /// preheader of the vector loop.
3067 VPBasicBlock *Entry;
3068
3069 /// VPBasicBlock corresponding to the original preheader. Used to place
3070 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3071 /// rest of VPlan execution.
3072 VPBasicBlock *Preheader;
3073
3074 /// Holds the VFs applicable to this VPlan.
3076
3077 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3078 /// any UF.
3080
3081 /// Holds the name of the VPlan, for printing.
3082 std::string Name;
3083
3084 /// Represents the trip count of the original loop, for folding
3085 /// the tail.
3086 VPValue *TripCount = nullptr;
3087
3088 /// Represents the backedge taken count of the original loop, for folding
3089 /// the tail. It equals TripCount - 1.
3090 VPValue *BackedgeTakenCount = nullptr;
3091
3092 /// Represents the vector trip count.
3093 VPValue VectorTripCount;
3094
3095 /// Represents the loop-invariant VF * UF of the vector loop region.
3096 VPValue VFxUF;
3097
3098 /// Holds a mapping between Values and their corresponding VPValue inside
3099 /// VPlan.
3100 Value2VPValueTy Value2VPValue;
3101
3102 /// Contains all the external definitions created for this VPlan. External
3103 /// definitions are VPValues that hold a pointer to their underlying IR.
3104 SmallVector<VPValue *, 16> VPLiveInsToFree;
3105
3106 /// Values used outside the plan.
3108
3109 /// Mapping from SCEVs to the VPValues representing their expansions.
3110 /// NOTE: This mapping is temporary and will be removed once all users have
3111 /// been modeled in VPlan directly.
3112 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3113
3114public:
3115 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3116 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3117 /// be disconnected, as the bypass blocks between them are not yet modeled in
3118 /// VPlan.
3119 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3120 : VPlan(Preheader, Entry) {
3121 TripCount = TC;
3122 }
3123
3124 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
3125 /// the plan. At the moment, \p Preheader and \p Entry need to be
3126 /// disconnected, as the bypass blocks between them are not yet modeled in
3127 /// VPlan.
3128 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3129 : Entry(Entry), Preheader(Preheader) {
3130 Entry->setPlan(this);
3131 Preheader->setPlan(this);
3132 assert(Preheader->getNumSuccessors() == 0 &&
3133 Preheader->getNumPredecessors() == 0 &&
3134 "preheader must be disconnected");
3135 }
3136
3137 ~VPlan();
3138
3139 /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
3140 /// original scalar pre-header) which contains SCEV expansions that need to
3141 /// happen before the CFG is modified; a VPBasicBlock for the vector
3142 /// pre-header, followed by a region for the vector loop, followed by the
3143 /// middle VPBasicBlock.
3144 static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3145 ScalarEvolution &PSE);
3146
3147 /// Prepare the plan for execution, setting up the required live-in values.
3148 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3149 Value *CanonicalIVStartValue, VPTransformState &State);
3150
3151 /// Generate the IR code for this VPlan.
3152 void execute(VPTransformState *State);
3153
3154 VPBasicBlock *getEntry() { return Entry; }
3155 const VPBasicBlock *getEntry() const { return Entry; }
3156
3157 /// The trip count of the original loop.
3159 assert(TripCount && "trip count needs to be set before accessing it");
3160 return TripCount;
3161 }
3162
3163 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3164 /// the original trip count have been replaced.
3165 void resetTripCount(VPValue *NewTripCount) {
3166 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3167 "TripCount always must be set");
3168 TripCount = NewTripCount;
3169 }
3170
3171 /// The backedge taken count of the original loop.
3173 if (!BackedgeTakenCount)
3174 BackedgeTakenCount = new VPValue();
3175 return BackedgeTakenCount;
3176 }
3177
3178 /// The vector trip count.
3179 VPValue &getVectorTripCount() { return VectorTripCount; }
3180
3181 /// Returns VF * UF of the vector loop region.
3182 VPValue &getVFxUF() { return VFxUF; }
3183
3184 void addVF(ElementCount VF) { VFs.insert(VF); }
3185
3187 assert(hasVF(VF) && "Cannot set VF not already in plan");
3188 VFs.clear();
3189 VFs.insert(VF);
3190 }
3191
3192 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3194 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3195 }
3196
3197 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3198
3199 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3200
3201 void setUF(unsigned UF) {
3202 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3203 UFs.clear();
3204 UFs.insert(UF);
3205 }
3206
3207 /// Return a string with the name of the plan and the applicable VFs and UFs.
3208 std::string getName() const;
3209
3210 void setName(const Twine &newName) { Name = newName.str(); }
3211
3212 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3213 /// yet) for \p V.
3215 assert(V && "Trying to get or add the VPValue of a null Value");
3216 if (!Value2VPValue.count(V)) {
3217 VPValue *VPV = new VPValue(V);
3218 VPLiveInsToFree.push_back(VPV);
3219 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3220 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3221 Value2VPValue[V] = VPV;
3222 }
3223
3224 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3225 assert(Value2VPValue[V]->isLiveIn() &&
3226 "Only live-ins should be in mapping");
3227 return Value2VPValue[V];
3228 }
3229
3230 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3231 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3232
3233#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3234 /// Print the live-ins of this VPlan to \p O.
3235 void printLiveIns(raw_ostream &O) const;
3236
3237 /// Print this VPlan to \p O.
3238 void print(raw_ostream &O) const;
3239
3240 /// Print this VPlan in DOT format to \p O.
3241 void printDOT(raw_ostream &O) const;
3242
3243 /// Dump the plan to stderr (for debugging).
3244 LLVM_DUMP_METHOD void dump() const;
3245#endif
3246
3247 /// Returns the VPRegionBlock of the vector loop.
3249 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3250 }
3252 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3253 }
3254
3255 /// Returns the canonical induction recipe of the vector loop.
3258 if (EntryVPBB->empty()) {
3259 // VPlan native path.
3260 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
3261 }
3262 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
3263 }
3264
3265 void addLiveOut(PHINode *PN, VPValue *V);
3266
3268 delete LiveOuts[PN];
3269 LiveOuts.erase(PN);
3270 }
3271
3273 return LiveOuts;
3274 }
3275
3276 VPValue *getSCEVExpansion(const SCEV *S) const {
3277 return SCEVToExpansion.lookup(S);
3278 }
3279
3280 void addSCEVExpansion(const SCEV *S, VPValue *V) {
3281 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3282 SCEVToExpansion[S] = V;
3283 }
3284
3285 /// \return The block corresponding to the original preheader.
3286 VPBasicBlock *getPreheader() { return Preheader; }
3287 const VPBasicBlock *getPreheader() const { return Preheader; }
3288
3289 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3290 /// recipes to refer to the clones, and return it.
3291 VPlan *duplicate();
3292
3293private:
3294 /// Add to the given dominator tree the header block and every new basic block
3295 /// that was created between it and the latch block, inclusive.
3296 static void updateDominatorTree(DominatorTree *DT, BasicBlock *LoopHeaderBB,
3297 BasicBlock *LoopLatchBB,
3298 BasicBlock *LoopExitBB);
3299};
3300
3301#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3302/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3303/// indented and follows the dot format.
3305 raw_ostream &OS;
3306 const VPlan &Plan;
3307 unsigned Depth = 0;
3308 unsigned TabWidth = 2;
3309 std::string Indent;
3310 unsigned BID = 0;
3312
3314
3315 /// Handle indentation.
3316 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3317
3318 /// Print a given \p Block of the Plan.
3319 void dumpBlock(const VPBlockBase *Block);
3320
3321 /// Print the information related to the CFG edges going out of a given
3322 /// \p Block, followed by printing the successor blocks themselves.
3323 void dumpEdges(const VPBlockBase *Block);
3324
3325 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3326 /// its successor blocks.
3327 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3328
3329 /// Print a given \p Region of the Plan.
3330 void dumpRegion(const VPRegionBlock *Region);
3331
3332 unsigned getOrCreateBID(const VPBlockBase *Block) {
3333 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
3334 }
3335
3336 Twine getOrCreateName(const VPBlockBase *Block);
3337
3338 Twine getUID(const VPBlockBase *Block);
3339
3340 /// Print the information related to a CFG edge between two VPBlockBases.
3341 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3342 const Twine &Label);
3343
3344public:
3346 : OS(O), Plan(P), SlotTracker(&P) {}
3347
3348 LLVM_DUMP_METHOD void dump();
3349};
3350
3352 const Value *V;
3353
3354 VPlanIngredient(const Value *V) : V(V) {}
3355
3356 void print(raw_ostream &O) const;
3357};
3358
3360 I.print(OS);
3361 return OS;
3362}
3363
3365 Plan.print(OS);
3366 return OS;
3367}
3368#endif
3369
3370//===----------------------------------------------------------------------===//
3371// VPlan Utilities
3372//===----------------------------------------------------------------------===//
3373
3374/// Class that provides utilities for VPBlockBases in VPlan.
3376public:
3377 VPBlockUtils() = delete;
3378
3379 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3380 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3381 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3382 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3383 /// have neither successors nor predecessors.
3384 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3385 assert(NewBlock->getSuccessors().empty() &&
3386 NewBlock->getPredecessors().empty() &&
3387 "Can't insert new block with predecessors or successors.");
3388 NewBlock->setParent(BlockPtr->getParent());
3389 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3390 for (VPBlockBase *Succ : Succs) {
3391 disconnectBlocks(BlockPtr, Succ);
3392 connectBlocks(NewBlock, Succ);
3393 }
3394 connectBlocks(BlockPtr, NewBlock);
3395 }
3396
3397 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3398 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3399 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3400 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3401 /// and \p IfTrue and \p IfFalse must have neither successors nor
3402 /// predecessors.
3403 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3404 VPBlockBase *BlockPtr) {
3405 assert(IfTrue->getSuccessors().empty() &&
3406 "Can't insert IfTrue with successors.");
3407 assert(IfFalse->getSuccessors().empty() &&
3408 "Can't insert IfFalse with successors.");
3409 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3410 IfTrue->setPredecessors({BlockPtr});
3411 IfFalse->setPredecessors({BlockPtr});
3412 IfTrue->setParent(BlockPtr->getParent());
3413 IfFalse->setParent(BlockPtr->getParent());
3414 }
3415
3416 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3417 /// the successors of \p From and \p From to the predecessors of \p To. Both
3418 /// VPBlockBases must have the same parent, which can be null. Both
3419 /// VPBlockBases can be already connected to other VPBlockBases.
3421 assert((From->getParent() == To->getParent()) &&
3422 "Can't connect two block with different parents");
3423 assert(From->getNumSuccessors() < 2 &&
3424 "Blocks can't have more than two successors.");
3425 From->appendSuccessor(To);
3426 To->appendPredecessor(From);
3427 }
3428
3429 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3430 /// from the successors of \p From and \p From from the predecessors of \p To.
3432 assert(To && "Successor to disconnect is null.");
3433 From->removeSuccessor(To);
3434 To->removePredecessor(From);
3435 }
3436
3437 /// Return an iterator range over \p Range which only includes \p BlockTy
3438 /// blocks. The accesses are casted to \p BlockTy.
3439 template <typename BlockTy, typename T>
3440 static auto blocksOnly(const T &Range) {
3441 // Create BaseTy with correct const-ness based on BlockTy.
3442 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3443 const VPBlockBase, VPBlockBase>;
3444
3445 // We need to first create an iterator range over (const) BlocktTy & instead
3446 // of (const) BlockTy * for filter_range to work properly.
3447 auto Mapped =
3448 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3450 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3451 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3452 return cast<BlockTy>(&Block);
3453 });
3454 }
3455};
3456
3459 InterleaveGroupMap;
3460
3461 /// Type for mapping of instruction based interleave groups to VPInstruction
3462 /// interleave groups
3465
3466 /// Recursively \p Region and populate VPlan based interleave groups based on
3467 /// \p IAI.
3468 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3470 /// Recursively traverse \p Block and populate VPlan based interleave groups
3471 /// based on \p IAI.
3472 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3474
3475public:
3477
3480 // Avoid releasing a pointer twice.
3481 for (auto &I : InterleaveGroupMap)
3482 DelSet.insert(I.second);
3483 for (auto *Ptr : DelSet)
3484 delete Ptr;
3485 }
3486
3487 /// Get the interleave group that \p Instr belongs to.
3488 ///
3489 /// \returns nullptr if doesn't have such group.
3492 return InterleaveGroupMap.lookup(Instr);
3493 }
3494};
3495
3496/// Class that maps (parts of) an existing VPlan to trees of combined
3497/// VPInstructions.
3499 enum class OpMode { Failed, Load, Opcode };
3500
3501 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3502 /// DenseMap keys.
3503 struct BundleDenseMapInfo {
3504 static SmallVector<VPValue *, 4> getEmptyKey() {
3505 return {reinterpret_cast<VPValue *>(-1)};
3506 }
3507
3508 static SmallVector<VPValue *, 4> getTombstoneKey() {
3509 return {reinterpret_cast<VPValue *>(-2)};
3510 }
3511
3512 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3513 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3514 }
3515
3516 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3518 return LHS == RHS;
3519 }
3520 };
3521
3522 /// Mapping of values in the original VPlan to a combined VPInstruction.
3524 BundleToCombined;
3525
3527
3528 /// Basic block to operate on. For now, only instructions in a single BB are
3529 /// considered.
3530 const VPBasicBlock &BB;
3531
3532 /// Indicates whether we managed to combine all visited instructions or not.
3533 bool CompletelySLP = true;
3534
3535 /// Width of the widest combined bundle in bits.
3536 unsigned WidestBundleBits = 0;
3537
3538 using MultiNodeOpTy =
3539 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3540
3541 // Input operand bundles for the current multi node. Each multi node operand
3542 // bundle contains values not matching the multi node's opcode. They will
3543 // be reordered in reorderMultiNodeOps, once we completed building a
3544 // multi node.
3545 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3546
3547 /// Indicates whether we are building a multi node currently.
3548 bool MultiNodeActive = false;
3549
3550 /// Check if we can vectorize Operands together.
3551 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3552
3553 /// Add combined instruction \p New for the bundle \p Operands.
3554 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3555
3556 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3557 VPInstruction *markFailed();
3558
3559 /// Reorder operands in the multi node to maximize sequential memory access
3560 /// and commutative operations.
3561 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3562
3563 /// Choose the best candidate to use for the lane after \p Last. The set of
3564 /// candidates to choose from are values with an opcode matching \p Last's
3565 /// or loads consecutive to \p Last.
3566 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3567 SmallPtrSetImpl<VPValue *> &Candidates,
3569
3570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3571 /// Print bundle \p Values to dbgs().
3572 void dumpBundle(ArrayRef<VPValue *> Values);
3573#endif
3574
3575public:
3576 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3577
3578 ~VPlanSlp() = default;
3579
3580 /// Tries to build an SLP tree rooted at \p Operands and returns a
3581 /// VPInstruction combining \p Operands, if they can be combined.
3583
3584 /// Return the width of the widest combined bundle in bits.
3585 unsigned getWidestBundleBits() const { return WidestBundleBits; }
3586
3587 /// Return true if all visited instruction can be combined.
3588 bool isCompletelySLP() const { return CompletelySLP; }
3589};
3590
3591namespace vputils {
3592
3593/// Returns true if only the first lane of \p Def is used.
3594bool onlyFirstLaneUsed(const VPValue *Def);
3595
3596/// Returns true if only the first part of \p Def is used.
3597bool onlyFirstPartUsed(const VPValue *Def);
3598
3599/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3600/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3601/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3602/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3603/// create a new one.
3605 ScalarEvolution &SE);
3606
3607/// Returns true if \p VPV is uniform after vectorization.
3609 // A value defined outside the vector region must be uniform after
3610 // vectorization inside a vector region.
3612 return true;
3613 VPRecipeBase *Def = VPV->getDefiningRecipe();
3614 assert(Def && "Must have definition for value defined inside vector region");
3615 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3616 return Rep->isUniform();
3617 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3618 return all_of(GEP->operands(), isUniformAfterVectorization);
3619 if (auto *VPI = dyn_cast<VPInstruction>(Def))
3620 return VPI->getOpcode() == VPInstruction::ComputeReductionResult;
3621 return false;
3622}
3623} // end namespace vputils
3624
3625} // end namespace llvm
3626
3627#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:391
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:804
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:930
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:201
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:444
uint32_t getFactor() const
Definition: VectorUtils.h:460
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:514
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:586
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:693
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:111
ElementCount operator*() const
Definition: VPlan.h:119
iterator & operator++()
Definition: VPlan.h:121
iterator(ElementCount VF)
Definition: VPlan.h:115
bool operator==(const iterator &Other) const
Definition: VPlan.h:117
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2623
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2631
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2637
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:2625
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2827
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:2849
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2895
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:2939
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:2851
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2848
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:443
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2874
iterator end()
Definition: VPlan.h:2858
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:2836
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:2856
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:2850
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:2905
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:210
~VPBasicBlock() override
Definition: VPlan.h:2842
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:546
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:511
const_reverse_iterator rbegin() const
Definition: VPlan.h:2862
reverse_iterator rend()
Definition: VPlan.h:2863
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:521
VPRecipeBase & back()
Definition: VPlan.h:2871
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:613
const VPRecipeBase & front() const
Definition: VPlan.h:2868
const_iterator begin() const
Definition: VPlan.h:2857
VPRecipeBase & front()
Definition: VPlan.h:2869
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:596
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:584
const VPRecipeBase & back() const
Definition: VPlan.h:2870
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2886
bool empty() const
Definition: VPlan.h:2867
const_iterator end() const
Definition: VPlan.h:2859
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2882
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:2877
reverse_iterator rbegin()
Definition: VPlan.h:2861
size_t size() const
Definition: VPlan.h:2866
const_reverse_iterator rend() const
Definition: VPlan.h:2864
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:1950
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:1955
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1993
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1973
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1978
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1970
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1961
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:417
VPRegionBlock * getParent()
Definition: VPlan.h:489
VPBlocksTy & getPredecessors()
Definition: VPlan.h:520
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:175
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:658
void setName(const Twine &newName)
Definition: VPlan.h:482
size_t getNumSuccessors() const
Definition: VPlan.h:534
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:517
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:601
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:623
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:648
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:570
size_t getNumPredecessors() const
Definition: VPlan.h:535
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:603
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:197
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:519
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:205
VPlan * getPlan()
Definition: VPlan.cpp:148
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:167
const VPRegionBlock * getParent() const
Definition: VPlan.h:490
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:634
const std::string & getName() const
Definition: VPlan.h:480
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:613
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:560
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:594
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:530
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:554
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:610
enum { VPBasicBlockSC, VPRegionBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:474
unsigned getVPBlockID() const
Definition: VPlan.h:487
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:466
VPBlocksTy & getSuccessors()
Definition: VPlan.h:515
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:189
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:153
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:583
void setParent(VPRegionBlock *P)
Definition: VPlan.h:500
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:576
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:514
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:3375
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:3440
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:3384
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:3403
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3431
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3420
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2217
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2249
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2237
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2219
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2225
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2256
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:2566
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:2607
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2581
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2573
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:2568
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2600
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2595
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:313
unsigned getVPDefID() const
Definition: VPlanValue.h:433
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:2720
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2735
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2765
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Definition: VPlan.h:2728
Type * getScalarType() const
Definition: VPlan.h:2760
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2743
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2768
VPValue * getStartValue() const
Definition: VPlan.h:2764
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:2655
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2668
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2662
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:2657
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2677
Recipe to expand a SCEV expression.
Definition: VPlan.h:2534
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:2539
const SCEV * getSCEV() const
Definition: VPlan.h:2559
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2544
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1636
static bool classof(const VPValue *V)
Definition: VPlan.h:1653
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:1638
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1680
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1669
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1677
VPValue * getStartValue() const
Definition: VPlan.h:1672
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1649
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1686
~VPHeaderPHIRecipe() override=default
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1160
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1311
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1224
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1254
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1166
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1176
@ CalculateTripCountMinusVF
Definition: VPlan.h:1174
bool hasResult() const
Definition: VPlan.h:1285
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
unsigned getOpcode() const
Definition: VPlan.h:1261
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1236
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1229
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1241
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:1278
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2007
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2086
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2048
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2019
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2054
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2040
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2061
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2077
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2081
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:3491
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:143
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:169
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:212
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:68
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:165
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:193
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:196
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:183
static VPLane getFirstLane()
Definition: VPlan.h:167
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:146
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:199
A value that is used outside the VPlan.
Definition: VPlan.h:669
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:673
static bool classof(const VPUser *U)
Definition: VPlan.h:676
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:688
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:694
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2268
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2292
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2272
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2276
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:709
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:795
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:734
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:800
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:771
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:720
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:735
static bool classof(const VPUser *U)
Definition: VPlan.h:776
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:725
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:784
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:898
ExactFlagsTy ExactFlags
Definition: VPlan.h:954
FastMathFlagsTy FMFs
Definition: VPlan.h:957
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1031
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:956
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:951
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1006
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1083
bool isInBounds() const
Definition: VPlan.h:1122
GEPFlagsTy GEPFlags
Definition: VPlan.h:955
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1037
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1018
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1052
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1129
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:976
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:953
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1012
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1024
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:962
WrapFlagsTy WrapFlags
Definition: VPlan.h:952
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1133
bool isDisjoint() const
Definition: VPlan.h:1145
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1116
bool hasNoSignedWrap() const
Definition: VPlan.h:1139
static bool classof(const VPUser *U)
Definition: VPlan.h:1046
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:969
A recipe for handling reduction phis.
Definition: VPlan.h:1891
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1904
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1942
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1914
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:1945
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1924
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:1937
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2096
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2133
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2102
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2135
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2131
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2114
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:2960
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:666
const VPBlockBase * getEntry() const
Definition: VPlan.h:2999
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3031
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:675
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3016
VPBlockBase * getExiting()
Definition: VPlan.h:3012
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3004
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:734
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:2982
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:2973
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:682
const VPBlockBase * getExiting() const
Definition: VPlan.h:3011
VPBlockBase * getEntry()
Definition: VPlan.h:3000
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3024
~VPRegionBlock() override
Definition: VPlan.h:2986
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2995
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2144
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2189
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2196
bool isUniform() const
Definition: VPlan.h:2184
bool isPredicated() const
Definition: VPlan.h:2186
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2163
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2153
unsigned getOpcode() const
Definition: VPlan.h:2213
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2208
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1412
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1426
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1442
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1440
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1420
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:2777
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2817
VPValue * getStepValue() const
Definition: VPlan.h:2814
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2787
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2797
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:2781
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:826
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:832
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:889
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:841
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:892
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:829
static bool classof(const VPUser *U)
Definition: VPlan.h:881
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:837
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:454
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:36
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203
operand_range operands()
Definition: VPlanValue.h:278
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:258
unsigned getNumOperands() const
Definition: VPlanValue.h:252
operand_iterator op_begin()
Definition: VPlanValue.h:274
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:247
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:118
unsigned getNumUsers() const
Definition: VPlanValue.h:112
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:168
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:133
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:187
A recipe to compute the pointers for widened memory accesses of IndexTy for all parts.
Definition: VPlan.h:1580
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, bool IsInBounds, DebugLoc DL)
Definition: VPlan.h:1585
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1595
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1601
A recipe for widening Call instructions.
Definition: VPlan.h:1451
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1492
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1475
VPWidenCallRecipe(Value *UV, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID, DebugLoc DL={}, Function *Variant=nullptr)
Definition: VPlan.h:1463
Function * getCalledScalarFunction() const
Definition: VPlan.h:1485
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1489
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:2691
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2698
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:2693
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1362
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1370
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1405
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1408
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1380
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1386
A recipe for handling GEP instructions.
Definition: VPlan.h:1538
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1560
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1555
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1693
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc)
Definition: VPlan.h:1706
const TruncInst * getTruncInst() const
Definition: VPlan.h:1754
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:1740
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1716
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1753
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1748
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc)
Definition: VPlan.h:1699
const VPValue * getStepValue() const
Definition: VPlan.h:1749
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1767
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1733
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1759
A common base class for widening memory operations.
Definition: VPlan.h:2301
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2312
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2309
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2348
static bool classof(const VPUser *U)
Definition: VPlan.h:2342
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2368
Instruction & Ingredient
Definition: VPlan.h:2303
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2331
Instruction & getIngredient() const
Definition: VPlan.h:2372
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2306
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2335
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2322
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2362
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2358
void setMask(VPValue *Mask)
Definition: VPlan.h:2314
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2355
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2352
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:1819
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1849
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1858
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1825
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1831
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1855
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1792
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1807
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1780
VPWidenRecipe is a recipe for producing a copy of vector type its ingredient.
Definition: VPlan.h:1330
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1341
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1335
unsigned getOpcode() const
Definition: VPlan.h:1352
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:3304
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:3345
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1131
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:3498
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:3588
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:3576
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:3585
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3061
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:984
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:960
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:783
bool hasScalableVF()
Definition: VPlan.h:3193
VPBasicBlock * getEntry()
Definition: VPlan.h:3154
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3179
void setName(const Twine &newName)
Definition: VPlan.h:3210
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3182
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3158
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3172
void removeLiveOut(PHINode *PN)
Definition: VPlan.h:3267
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:993
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3155
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader, trip count TC and Entry to the plan.
Definition: VPlan.h:3119
VPBasicBlock * getPreheader()
Definition: VPlan.h:3286
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3248
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:3251
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE)
Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping original scalar pre-header) w...
Definition: VPlan.cpp:769
bool hasVF(ElementCount VF)
Definition: VPlan.h:3192
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:3280
bool hasUF(unsigned UF) const
Definition: VPlan.h:3199
void setVF(ElementCount VF)
Definition: VPlan.h:3186
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3165
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader and Entry to the plan.
Definition: VPlan.h:3128
const VPBasicBlock * getPreheader() const
Definition: VPlan.h:3287
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3214
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:990
bool hasScalarVFOnly() const
Definition: VPlan.h:3197
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:825
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:3256
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:3272
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:934
void addVF(ElementCount VF)
Definition: VPlan.h:3184
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3231
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:3276
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:904
void setUF(unsigned UF)
Definition: VPlan.h:3201
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1074
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:284
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1459
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3608
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1454
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1449
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *OrigLoop)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:134
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:491
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:87
iterator end()
Definition: VPlan.h:128
const ElementCount Start
Definition: VPlan.h:89
ElementCount End
Definition: VPlan.h:92
iterator begin()
Definition: VPlan.h:127
bool isEmpty() const
Definition: VPlan.h:94
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:98
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1864
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1874
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1865
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1870
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:219
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:229
unsigned Part
in [0..UF)
Definition: VPlan.h:221
VPLane Lane
Definition: VPlan.h:223
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:225
bool isFirstIteration() const
Definition: VPlan.h:231
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:915
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:359
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:365
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:373
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:361
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:369
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:254
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:259
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:256
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:247
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:383
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:409
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:412
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:303
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:325
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:405
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:353
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:393
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:311
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:288
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:248
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:389
DominatorTree * DT
Hold a pointer to Dominator Tree to register new basic blocks in the loop.
Definition: VPlan.h:386
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:276
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:395
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:392
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:270
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:242
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:398
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2416
void execute(VPTransformState &State) override
Generate the wide load or gather.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2428
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2417
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2440
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2377
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2378
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2404
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2386
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1504
bool isInvariantCond() const
Definition: VPlan.h:1532
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1512
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1506
VPValue * getCond() const
Definition: VPlan.h:1528
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:2492
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2504
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2493
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2519
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2507
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:2451
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2480
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2452
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2468
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2459
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:3354
const Value * V
Definition: VPlan.h:3352
void print(raw_ostream &O) const
Definition: VPlan.cpp:1249