LLVM 19.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/MapVector.h"
35#include "llvm/ADT/Twine.h"
36#include "llvm/ADT/ilist.h"
37#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
45#include <algorithm>
46#include <cassert>
47#include <cstddef>
48#include <string>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
54class InnerLoopVectorizer;
55class IRBuilderBase;
56class LoopInfo;
57class raw_ostream;
58class RecurrenceDescriptor;
59class SCEV;
60class Type;
61class VPBasicBlock;
62class VPRegionBlock;
63class VPlan;
64class VPReplicateRecipe;
65class VPlanSlp;
66class Value;
67class LoopVersioning;
68
69namespace Intrinsic {
70typedef unsigned ID;
71}
72
73/// Returns a calculation for the total number of elements for a given \p VF.
74/// For fixed width vectors this value is a constant, whereas for scalable
75/// vectors it is an expression determined at runtime.
76Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
77
78/// Return a value for Step multiplied by VF.
79Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
80 int64_t Step);
81
82const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
83 Loop *CurLoop = nullptr);
84
85/// A range of powers-of-2 vectorization factors with fixed start and
86/// adjustable end. The range includes start and excludes end, e.g.,:
87/// [1, 16) = {1, 2, 4, 8}
88struct VFRange {
89 // A power of 2.
91
92 // A power of 2. If End <= Start range is empty.
94
95 bool isEmpty() const {
97 }
98
100 : Start(Start), End(End) {
102 "Both Start and End should have the same scalable flag");
104 "Expected Start to be a power of 2");
106 "Expected End to be a power of 2");
107 }
108
109 /// Iterator to iterate over vectorization factors in a VFRange.
111 : public iterator_facade_base<iterator, std::forward_iterator_tag,
112 ElementCount> {
113 ElementCount VF;
114
115 public:
116 iterator(ElementCount VF) : VF(VF) {}
117
118 bool operator==(const iterator &Other) const { return VF == Other.VF; }
119
120 ElementCount operator*() const { return VF; }
121
123 VF *= 2;
124 return *this;
125 }
126 };
127
131 return iterator(End);
132 }
133};
134
135using VPlanPtr = std::unique_ptr<VPlan>;
136
137/// In what follows, the term "input IR" refers to code that is fed into the
138/// vectorizer whereas the term "output IR" refers to code that is generated by
139/// the vectorizer.
140
141/// VPLane provides a way to access lanes in both fixed width and scalable
142/// vectors, where for the latter the lane index sometimes needs calculating
143/// as a runtime expression.
144class VPLane {
145public:
146 /// Kind describes how to interpret Lane.
147 enum class Kind : uint8_t {
148 /// For First, Lane is the index into the first N elements of a
149 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
150 First,
151 /// For ScalableLast, Lane is the offset from the start of the last
152 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
153 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
154 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
156 };
157
158private:
159 /// in [0..VF)
160 unsigned Lane;
161
162 /// Indicates how the Lane should be interpreted, as described above.
163 Kind LaneKind;
164
165public:
166 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
167
169
170 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
171 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
172 "trying to extract with invalid offset");
173 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
174 Kind LaneKind;
175 if (VF.isScalable())
176 // In this case 'LaneOffset' refers to the offset from the start of the
177 // last subvector with VF.getKnownMinValue() elements.
179 else
180 LaneKind = VPLane::Kind::First;
181 return VPLane(LaneOffset, LaneKind);
182 }
183
185 return getLaneFromEnd(VF, 1);
186 }
187
188 /// Returns a compile-time known value for the lane index and asserts if the
189 /// lane can only be calculated at runtime.
190 unsigned getKnownLane() const {
191 assert(LaneKind == Kind::First);
192 return Lane;
193 }
194
195 /// Returns an expression describing the lane index that can be used at
196 /// runtime.
197 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
198
199 /// Returns the Kind of lane offset.
200 Kind getKind() const { return LaneKind; }
201
202 /// Returns true if this is the first lane of the whole vector.
203 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
204
205 /// Maps the lane to a cache index based on \p VF.
206 unsigned mapToCacheIndex(const ElementCount &VF) const {
207 switch (LaneKind) {
209 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
210 return VF.getKnownMinValue() + Lane;
211 default:
212 assert(Lane < VF.getKnownMinValue());
213 return Lane;
214 }
215 }
216
217 /// Returns the maxmimum number of lanes that we are able to consider
218 /// caching for \p VF.
219 static unsigned getNumCachedLanes(const ElementCount &VF) {
220 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
221 }
222};
223
224/// VPIteration represents a single point in the iteration space of the output
225/// (vectorized and/or unrolled) IR loop.
227 /// in [0..UF)
228 unsigned Part;
229
231
232 VPIteration(unsigned Part, unsigned Lane,
234 : Part(Part), Lane(Lane, Kind) {}
235
236 VPIteration(unsigned Part, const VPLane &Lane) : Part(Part), Lane(Lane) {}
237
238 bool isFirstIteration() const { return Part == 0 && Lane.isFirstLane(); }
239};
240
241/// VPTransformState holds information passed down when "executing" a VPlan,
242/// needed for generating the output IR.
247
248 /// The chosen Vectorization and Unroll Factors of the loop being vectorized.
250 unsigned UF;
251
252 /// Hold the indices to generate specific scalar instructions. Null indicates
253 /// that all instances are to be generated, using either scalar or vector
254 /// instructions.
255 std::optional<VPIteration> Instance;
256
257 struct DataState {
258 /// A type for vectorized values in the new loop. Each value from the
259 /// original loop, when vectorized, is represented by UF vector values in
260 /// the new unrolled loop, where UF is the unroll factor.
262
264
268
269 /// Get the generated vector Value for a given VPValue \p Def and a given \p
270 /// Part if \p IsScalar is false, otherwise return the generated scalar
271 /// for \p Part. \See set.
272 Value *get(VPValue *Def, unsigned Part, bool IsScalar = false);
273
274 /// Get the generated Value for a given VPValue and given Part and Lane.
275 Value *get(VPValue *Def, const VPIteration &Instance);
276
277 bool hasVectorValue(VPValue *Def, unsigned Part) {
278 auto I = Data.PerPartOutput.find(Def);
279 return I != Data.PerPartOutput.end() && Part < I->second.size() &&
280 I->second[Part];
281 }
282
284 auto I = Data.PerPartScalars.find(Def);
285 if (I == Data.PerPartScalars.end())
286 return false;
287 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
288 return Instance.Part < I->second.size() &&
289 CacheIdx < I->second[Instance.Part].size() &&
290 I->second[Instance.Part][CacheIdx];
291 }
292
293 /// Set the generated vector Value for a given VPValue and a given Part, if \p
294 /// IsScalar is false. If \p IsScalar is true, set the scalar in (Part, 0).
295 void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar = false) {
296 if (IsScalar) {
297 set(Def, V, VPIteration(Part, 0));
298 return;
299 }
300 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
301 "scalar values must be stored as (Part, 0)");
302 if (!Data.PerPartOutput.count(Def)) {
304 Data.PerPartOutput[Def] = Entry;
305 }
306 Data.PerPartOutput[Def][Part] = V;
307 }
308
309 /// Reset an existing vector value for \p Def and a given \p Part.
310 void reset(VPValue *Def, Value *V, unsigned Part) {
311 auto Iter = Data.PerPartOutput.find(Def);
312 assert(Iter != Data.PerPartOutput.end() &&
313 "need to overwrite existing value");
314 Iter->second[Part] = V;
315 }
316
317 /// Set the generated scalar \p V for \p Def and the given \p Instance.
318 void set(VPValue *Def, Value *V, const VPIteration &Instance) {
319 auto Iter = Data.PerPartScalars.insert({Def, {}});
320 auto &PerPartVec = Iter.first->second;
321 if (PerPartVec.size() <= Instance.Part)
322 PerPartVec.resize(Instance.Part + 1);
323 auto &Scalars = PerPartVec[Instance.Part];
324 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
325 if (Scalars.size() <= CacheIdx)
326 Scalars.resize(CacheIdx + 1);
327 assert(!Scalars[CacheIdx] && "should overwrite existing value");
328 Scalars[CacheIdx] = V;
329 }
330
331 /// Reset an existing scalar value for \p Def and a given \p Instance.
332 void reset(VPValue *Def, Value *V, const VPIteration &Instance) {
333 auto Iter = Data.PerPartScalars.find(Def);
334 assert(Iter != Data.PerPartScalars.end() &&
335 "need to overwrite existing value");
336 assert(Instance.Part < Iter->second.size() &&
337 "need to overwrite existing value");
338 unsigned CacheIdx = Instance.Lane.mapToCacheIndex(VF);
339 assert(CacheIdx < Iter->second[Instance.Part].size() &&
340 "need to overwrite existing value");
341 Iter->second[Instance.Part][CacheIdx] = V;
342 }
343
344 /// Add additional metadata to \p To that was not present on \p Orig.
345 ///
346 /// Currently this is used to add the noalias annotations based on the
347 /// inserted memchecks. Use this for instructions that are *cloned* into the
348 /// vector loop.
349 void addNewMetadata(Instruction *To, const Instruction *Orig);
350
351 /// Add metadata from one instruction to another.
352 ///
353 /// This includes both the original MDs from \p From and additional ones (\see
354 /// addNewMetadata). Use this for *newly created* instructions in the vector
355 /// loop.
356 void addMetadata(Value *To, Instruction *From);
357
358 /// Set the debug location in the builder using the debug location \p DL.
360
361 /// Construct the vector value of a scalarized value \p V one lane at a time.
363
364 /// Hold state information used when constructing the CFG of the output IR,
365 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
366 struct CFGState {
367 /// The previous VPBasicBlock visited. Initially set to null.
369
370 /// The previous IR BasicBlock created or used. Initially set to the new
371 /// header BasicBlock.
372 BasicBlock *PrevBB = nullptr;
373
374 /// The last IR BasicBlock in the output IR. Set to the exit block of the
375 /// vector loop.
376 BasicBlock *ExitBB = nullptr;
377
378 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
379 /// of replication, maps the BasicBlock of the last replica created.
381
382 /// Updater for the DominatorTree.
384
386 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
387
388 /// Returns the BasicBlock* mapped to the pre-header of the loop region
389 /// containing \p R.
392
393 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
395
396 /// Hold a reference to the IRBuilder used to generate output IR code.
398
399 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
401
402 /// Pointer to the VPlan code is generated for.
404
405 /// The loop object for the current parent region, or nullptr.
407
408 /// LoopVersioning. It's only set up (non-null) if memchecks were
409 /// used.
410 ///
411 /// This is currently only used to add no-alias metadata based on the
412 /// memchecks. The actually versioning is performed manually.
414
415 /// Map SCEVs to their expanded values. Populated when executing
416 /// VPExpandSCEVRecipes.
418
419 /// VPlan-based type analysis.
421};
422
423/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
424/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
426 friend class VPBlockUtils;
427
428 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
429
430 /// An optional name for the block.
431 std::string Name;
432
433 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
434 /// it is a topmost VPBlockBase.
435 VPRegionBlock *Parent = nullptr;
436
437 /// List of predecessor blocks.
439
440 /// List of successor blocks.
442
443 /// VPlan containing the block. Can only be set on the entry block of the
444 /// plan.
445 VPlan *Plan = nullptr;
446
447 /// Add \p Successor as the last successor to this block.
448 void appendSuccessor(VPBlockBase *Successor) {
449 assert(Successor && "Cannot add nullptr successor!");
450 Successors.push_back(Successor);
451 }
452
453 /// Add \p Predecessor as the last predecessor to this block.
454 void appendPredecessor(VPBlockBase *Predecessor) {
455 assert(Predecessor && "Cannot add nullptr predecessor!");
456 Predecessors.push_back(Predecessor);
457 }
458
459 /// Remove \p Predecessor from the predecessors of this block.
460 void removePredecessor(VPBlockBase *Predecessor) {
461 auto Pos = find(Predecessors, Predecessor);
462 assert(Pos && "Predecessor does not exist");
463 Predecessors.erase(Pos);
464 }
465
466 /// Remove \p Successor from the successors of this block.
467 void removeSuccessor(VPBlockBase *Successor) {
468 auto Pos = find(Successors, Successor);
469 assert(Pos && "Successor does not exist");
470 Successors.erase(Pos);
471 }
472
473protected:
474 VPBlockBase(const unsigned char SC, const std::string &N)
475 : SubclassID(SC), Name(N) {}
476
477public:
478 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
479 /// that are actually instantiated. Values of this enumeration are kept in the
480 /// SubclassID field of the VPBlockBase objects. They are used for concrete
481 /// type identification.
482 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
483
485
486 virtual ~VPBlockBase() = default;
487
488 const std::string &getName() const { return Name; }
489
490 void setName(const Twine &newName) { Name = newName.str(); }
491
492 /// \return an ID for the concrete type of this object.
493 /// This is used to implement the classof checks. This should not be used
494 /// for any other purpose, as the values may change as LLVM evolves.
495 unsigned getVPBlockID() const { return SubclassID; }
496
497 VPRegionBlock *getParent() { return Parent; }
498 const VPRegionBlock *getParent() const { return Parent; }
499
500 /// \return A pointer to the plan containing the current block.
501 VPlan *getPlan();
502 const VPlan *getPlan() const;
503
504 /// Sets the pointer of the plan containing the block. The block must be the
505 /// entry block into the VPlan.
506 void setPlan(VPlan *ParentPlan);
507
508 void setParent(VPRegionBlock *P) { Parent = P; }
509
510 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
511 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
512 /// VPBlockBase is a VPBasicBlock, it is returned.
513 const VPBasicBlock *getEntryBasicBlock() const;
515
516 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
517 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
518 /// VPBlockBase is a VPBasicBlock, it is returned.
519 const VPBasicBlock *getExitingBasicBlock() const;
521
522 const VPBlocksTy &getSuccessors() const { return Successors; }
523 VPBlocksTy &getSuccessors() { return Successors; }
524
526
527 const VPBlocksTy &getPredecessors() const { return Predecessors; }
528 VPBlocksTy &getPredecessors() { return Predecessors; }
529
530 /// \return the successor of this VPBlockBase if it has a single successor.
531 /// Otherwise return a null pointer.
533 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
534 }
535
536 /// \return the predecessor of this VPBlockBase if it has a single
537 /// predecessor. Otherwise return a null pointer.
539 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
540 }
541
542 size_t getNumSuccessors() const { return Successors.size(); }
543 size_t getNumPredecessors() const { return Predecessors.size(); }
544
545 /// An Enclosing Block of a block B is any block containing B, including B
546 /// itself. \return the closest enclosing block starting from "this", which
547 /// has successors. \return the root enclosing block if all enclosing blocks
548 /// have no successors.
550
551 /// \return the closest enclosing block starting from "this", which has
552 /// predecessors. \return the root enclosing block if all enclosing blocks
553 /// have no predecessors.
555
556 /// \return the successors either attached directly to this VPBlockBase or, if
557 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
558 /// successors of its own, search recursively for the first enclosing
559 /// VPRegionBlock that has successors and return them. If no such
560 /// VPRegionBlock exists, return the (empty) successors of the topmost
561 /// VPBlockBase reached.
564 }
565
566 /// \return the hierarchical successor of this VPBlockBase if it has a single
567 /// hierarchical successor. Otherwise return a null pointer.
570 }
571
572 /// \return the predecessors either attached directly to this VPBlockBase or,
573 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
574 /// predecessors of its own, search recursively for the first enclosing
575 /// VPRegionBlock that has predecessors and return them. If no such
576 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
577 /// VPBlockBase reached.
580 }
581
582 /// \return the hierarchical predecessor of this VPBlockBase if it has a
583 /// single hierarchical predecessor. Otherwise return a null pointer.
586 }
587
588 /// Set a given VPBlockBase \p Successor as the single successor of this
589 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
590 /// This VPBlockBase must have no successors.
592 assert(Successors.empty() && "Setting one successor when others exist.");
593 assert(Successor->getParent() == getParent() &&
594 "connected blocks must have the same parent");
595 appendSuccessor(Successor);
596 }
597
598 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
599 /// successors of this VPBlockBase. This VPBlockBase is not added as
600 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
601 /// successors.
602 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
603 assert(Successors.empty() && "Setting two successors when others exist.");
604 appendSuccessor(IfTrue);
605 appendSuccessor(IfFalse);
606 }
607
608 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
609 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
610 /// as successor of any VPBasicBlock in \p NewPreds.
612 assert(Predecessors.empty() && "Block predecessors already set.");
613 for (auto *Pred : NewPreds)
614 appendPredecessor(Pred);
615 }
616
617 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
618 /// This VPBlockBase must have no successors. This VPBlockBase is not added
619 /// as predecessor of any VPBasicBlock in \p NewSuccs.
621 assert(Successors.empty() && "Block successors already set.");
622 for (auto *Succ : NewSuccs)
623 appendSuccessor(Succ);
624 }
625
626 /// Remove all the predecessor of this block.
627 void clearPredecessors() { Predecessors.clear(); }
628
629 /// Remove all the successors of this block.
630 void clearSuccessors() { Successors.clear(); }
631
632 /// The method which generates the output IR that correspond to this
633 /// VPBlockBase, thereby "executing" the VPlan.
634 virtual void execute(VPTransformState *State) = 0;
635
636 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
637 static void deleteCFG(VPBlockBase *Entry);
638
639 /// Return true if it is legal to hoist instructions into this block.
641 // There are currently no constraints that prevent an instruction to be
642 // hoisted into a VPBlockBase.
643 return true;
644 }
645
646 /// Replace all operands of VPUsers in the block with \p NewValue and also
647 /// replaces all uses of VPValues defined in the block with NewValue.
648 virtual void dropAllReferences(VPValue *NewValue) = 0;
649
650#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
651 void printAsOperand(raw_ostream &OS, bool PrintType) const {
652 OS << getName();
653 }
654
655 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
656 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
657 /// consequtive numbers.
658 ///
659 /// Note that the numbering is applied to the whole VPlan, so printing
660 /// individual blocks is consistent with the whole VPlan printing.
661 virtual void print(raw_ostream &O, const Twine &Indent,
662 VPSlotTracker &SlotTracker) const = 0;
663
664 /// Print plain-text dump of this VPlan to \p O.
665 void print(raw_ostream &O) const {
667 print(O, "", SlotTracker);
668 }
669
670 /// Print the successors of this block to \p O, prefixing all lines with \p
671 /// Indent.
672 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
673
674 /// Dump this VPBlockBase to dbgs().
675 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
676#endif
677
678 /// Clone the current block and it's recipes without updating the operands of
679 /// the cloned recipes, including all blocks in the single-entry single-exit
680 /// region for VPRegionBlocks.
681 virtual VPBlockBase *clone() = 0;
682};
683
684/// A value that is used outside the VPlan. The operand of the user needs to be
685/// added to the associated LCSSA phi node.
686class VPLiveOut : public VPUser {
687 PHINode *Phi;
688
689public:
691 : VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
692
693 static inline bool classof(const VPUser *U) {
694 return U->getVPUserID() == VPUser::VPUserID::LiveOut;
695 }
696
697 /// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
698 /// means we need to add the appropriate incoming value from the middle
699 /// block as exiting edges from the scalar epilogue loop (if present) are
700 /// already in place, and we exit the vector loop exclusively to the middle
701 /// block.
702 void fixPhi(VPlan &Plan, VPTransformState &State);
703
704 /// Returns true if the VPLiveOut uses scalars of operand \p Op.
705 bool usesScalars(const VPValue *Op) const override {
707 "Op must be an operand of the recipe");
708 return true;
709 }
710
711 PHINode *getPhi() const { return Phi; }
712
713#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
714 /// Print the VPLiveOut to \p O.
716#endif
717};
718
719/// VPRecipeBase is a base class modeling a sequence of one or more output IR
720/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
721/// and is responsible for deleting its defined values. Single-value
722/// recipes must inherit from VPSingleDef instead of inheriting from both
723/// VPRecipeBase and VPValue separately.
724class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
725 public VPDef,
726 public VPUser {
727 friend VPBasicBlock;
728 friend class VPBlockUtils;
729
730 /// Each VPRecipe belongs to a single VPBasicBlock.
731 VPBasicBlock *Parent = nullptr;
732
733 /// The debug location for the recipe.
734 DebugLoc DL;
735
736public:
738 DebugLoc DL = {})
740
741 template <typename IterT>
743 DebugLoc DL = {})
745 virtual ~VPRecipeBase() = default;
746
747 /// Clone the current recipe.
748 virtual VPRecipeBase *clone() = 0;
749
750 /// \return the VPBasicBlock which this VPRecipe belongs to.
751 VPBasicBlock *getParent() { return Parent; }
752 const VPBasicBlock *getParent() const { return Parent; }
753
754 /// The method which generates the output IR instructions that correspond to
755 /// this VPRecipe, thereby "executing" the VPlan.
756 virtual void execute(VPTransformState &State) = 0;
757
758 /// Insert an unlinked recipe into a basic block immediately before
759 /// the specified recipe.
760 void insertBefore(VPRecipeBase *InsertPos);
761 /// Insert an unlinked recipe into \p BB immediately before the insertion
762 /// point \p IP;
764
765 /// Insert an unlinked Recipe into a basic block immediately after
766 /// the specified Recipe.
767 void insertAfter(VPRecipeBase *InsertPos);
768
769 /// Unlink this recipe from its current VPBasicBlock and insert it into
770 /// the VPBasicBlock that MovePos lives in, right after MovePos.
771 void moveAfter(VPRecipeBase *MovePos);
772
773 /// Unlink this recipe and insert into BB before I.
774 ///
775 /// \pre I is a valid iterator into BB.
777
778 /// This method unlinks 'this' from the containing basic block, but does not
779 /// delete it.
780 void removeFromParent();
781
782 /// This method unlinks 'this' from the containing basic block and deletes it.
783 ///
784 /// \returns an iterator pointing to the element after the erased one
786
787 /// Method to support type inquiry through isa, cast, and dyn_cast.
788 static inline bool classof(const VPDef *D) {
789 // All VPDefs are also VPRecipeBases.
790 return true;
791 }
792
793 static inline bool classof(const VPUser *U) {
794 return U->getVPUserID() == VPUser::VPUserID::Recipe;
795 }
796
797 /// Returns true if the recipe may have side-effects.
798 bool mayHaveSideEffects() const;
799
800 /// Returns true for PHI-like recipes.
801 bool isPhi() const {
802 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
803 }
804
805 /// Returns true if the recipe may read from memory.
806 bool mayReadFromMemory() const;
807
808 /// Returns true if the recipe may write to memory.
809 bool mayWriteToMemory() const;
810
811 /// Returns true if the recipe may read from or write to memory.
812 bool mayReadOrWriteMemory() const {
814 }
815
816 /// Returns the debug location of the recipe.
817 DebugLoc getDebugLoc() const { return DL; }
818};
819
820// Helper macro to define common classof implementations for recipes.
821#define VP_CLASSOF_IMPL(VPDefID) \
822 static inline bool classof(const VPDef *D) { \
823 return D->getVPDefID() == VPDefID; \
824 } \
825 static inline bool classof(const VPValue *V) { \
826 auto *R = V->getDefiningRecipe(); \
827 return R && R->getVPDefID() == VPDefID; \
828 } \
829 static inline bool classof(const VPUser *U) { \
830 auto *R = dyn_cast<VPRecipeBase>(U); \
831 return R && R->getVPDefID() == VPDefID; \
832 } \
833 static inline bool classof(const VPRecipeBase *R) { \
834 return R->getVPDefID() == VPDefID; \
835 } \
836 static inline bool classof(const VPSingleDefRecipe *R) { \
837 return R->getVPDefID() == VPDefID; \
838 }
839
840/// VPSingleDef is a base class for recipes for modeling a sequence of one or
841/// more output IR that define a single result VPValue.
842/// Note that VPRecipeBase must be inherited from before VPValue.
843class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
844public:
845 template <typename IterT>
846 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
847 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
848
849 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
850 DebugLoc DL = {})
851 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
852
853 template <typename IterT>
854 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
855 DebugLoc DL = {})
856 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
857
858 static inline bool classof(const VPRecipeBase *R) {
859 switch (R->getVPDefID()) {
860 case VPRecipeBase::VPDerivedIVSC:
861 case VPRecipeBase::VPEVLBasedIVPHISC:
862 case VPRecipeBase::VPExpandSCEVSC:
863 case VPRecipeBase::VPInstructionSC:
864 case VPRecipeBase::VPReductionSC:
865 case VPRecipeBase::VPReplicateSC:
866 case VPRecipeBase::VPScalarIVStepsSC:
867 case VPRecipeBase::VPVectorPointerSC:
868 case VPRecipeBase::VPWidenCallSC:
869 case VPRecipeBase::VPWidenCanonicalIVSC:
870 case VPRecipeBase::VPWidenCastSC:
871 case VPRecipeBase::VPWidenGEPSC:
872 case VPRecipeBase::VPWidenSC:
873 case VPRecipeBase::VPWidenSelectSC:
874 case VPRecipeBase::VPBlendSC:
875 case VPRecipeBase::VPPredInstPHISC:
876 case VPRecipeBase::VPCanonicalIVPHISC:
877 case VPRecipeBase::VPActiveLaneMaskPHISC:
878 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
879 case VPRecipeBase::VPWidenPHISC:
880 case VPRecipeBase::VPWidenIntOrFpInductionSC:
881 case VPRecipeBase::VPWidenPointerInductionSC:
882 case VPRecipeBase::VPReductionPHISC:
883 case VPRecipeBase::VPScalarCastSC:
884 return true;
885 case VPRecipeBase::VPInterleaveSC:
886 case VPRecipeBase::VPBranchOnMaskSC:
887 case VPRecipeBase::VPWidenLoadEVLSC:
888 case VPRecipeBase::VPWidenLoadSC:
889 case VPRecipeBase::VPWidenStoreEVLSC:
890 case VPRecipeBase::VPWidenStoreSC:
891 // TODO: Widened stores don't define a value, but widened loads do. Split
892 // the recipes to be able to make widened loads VPSingleDefRecipes.
893 return false;
894 }
895 llvm_unreachable("Unhandled VPDefID");
896 }
897
898 static inline bool classof(const VPUser *U) {
899 auto *R = dyn_cast<VPRecipeBase>(U);
900 return R && classof(R);
901 }
902
903 virtual VPSingleDefRecipe *clone() override = 0;
904
905 /// Returns the underlying instruction.
907 return cast<Instruction>(getUnderlyingValue());
908 }
910 return cast<Instruction>(getUnderlyingValue());
911 }
912};
913
914/// Class to record LLVM IR flag for a recipe along with it.
916 enum class OperationType : unsigned char {
917 Cmp,
918 OverflowingBinOp,
919 DisjointOp,
920 PossiblyExactOp,
921 GEPOp,
922 FPMathOp,
923 NonNegOp,
924 Other
925 };
926
927public:
928 struct WrapFlagsTy {
929 char HasNUW : 1;
930 char HasNSW : 1;
931
933 };
934
936 char IsDisjoint : 1;
938 };
939
940protected:
941 struct GEPFlagsTy {
942 char IsInBounds : 1;
944 };
945
946private:
947 struct ExactFlagsTy {
948 char IsExact : 1;
949 };
950 struct NonNegFlagsTy {
951 char NonNeg : 1;
952 };
953 struct FastMathFlagsTy {
954 char AllowReassoc : 1;
955 char NoNaNs : 1;
956 char NoInfs : 1;
957 char NoSignedZeros : 1;
958 char AllowReciprocal : 1;
959 char AllowContract : 1;
960 char ApproxFunc : 1;
961
962 FastMathFlagsTy(const FastMathFlags &FMF);
963 };
964
965 OperationType OpType;
966
967 union {
971 ExactFlagsTy ExactFlags;
973 NonNegFlagsTy NonNegFlags;
974 FastMathFlagsTy FMFs;
975 unsigned AllFlags;
976 };
977
978protected:
980 OpType = Other.OpType;
981 AllFlags = Other.AllFlags;
982 }
983
984public:
985 template <typename IterT>
986 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
987 : VPSingleDefRecipe(SC, Operands, DL) {
988 OpType = OperationType::Other;
989 AllFlags = 0;
990 }
991
992 template <typename IterT>
993 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
995 if (auto *Op = dyn_cast<CmpInst>(&I)) {
996 OpType = OperationType::Cmp;
997 CmpPredicate = Op->getPredicate();
998 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
999 OpType = OperationType::DisjointOp;
1000 DisjointFlags.IsDisjoint = Op->isDisjoint();
1001 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1002 OpType = OperationType::OverflowingBinOp;
1003 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1004 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1005 OpType = OperationType::PossiblyExactOp;
1006 ExactFlags.IsExact = Op->isExact();
1007 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1008 OpType = OperationType::GEPOp;
1009 GEPFlags.IsInBounds = GEP->isInBounds();
1010 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1011 OpType = OperationType::NonNegOp;
1012 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1013 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1014 OpType = OperationType::FPMathOp;
1015 FMFs = Op->getFastMathFlags();
1016 } else {
1017 OpType = OperationType::Other;
1018 AllFlags = 0;
1019 }
1020 }
1021
1022 template <typename IterT>
1023 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1024 CmpInst::Predicate Pred, DebugLoc DL = {})
1025 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1026 CmpPredicate(Pred) {}
1027
1028 template <typename IterT>
1029 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1031 : VPSingleDefRecipe(SC, Operands, DL),
1032 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1033
1034 template <typename IterT>
1035 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1036 FastMathFlags FMFs, DebugLoc DL = {})
1037 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1038 FMFs(FMFs) {}
1039
1040 template <typename IterT>
1041 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1043 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1045
1046protected:
1047 template <typename IterT>
1048 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1049 GEPFlagsTy GEPFlags, DebugLoc DL = {})
1050 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1051 GEPFlags(GEPFlags) {}
1052
1053public:
1054 static inline bool classof(const VPRecipeBase *R) {
1055 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1056 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1057 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1058 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1059 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1060 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1061 }
1062
1063 static inline bool classof(const VPUser *U) {
1064 auto *R = dyn_cast<VPRecipeBase>(U);
1065 return R && classof(R);
1066 }
1067
1068 /// Drop all poison-generating flags.
1070 // NOTE: This needs to be kept in-sync with
1071 // Instruction::dropPoisonGeneratingFlags.
1072 switch (OpType) {
1073 case OperationType::OverflowingBinOp:
1074 WrapFlags.HasNUW = false;
1075 WrapFlags.HasNSW = false;
1076 break;
1077 case OperationType::DisjointOp:
1078 DisjointFlags.IsDisjoint = false;
1079 break;
1080 case OperationType::PossiblyExactOp:
1081 ExactFlags.IsExact = false;
1082 break;
1083 case OperationType::GEPOp:
1084 GEPFlags.IsInBounds = false;
1085 break;
1086 case OperationType::FPMathOp:
1087 FMFs.NoNaNs = false;
1088 FMFs.NoInfs = false;
1089 break;
1090 case OperationType::NonNegOp:
1091 NonNegFlags.NonNeg = false;
1092 break;
1093 case OperationType::Cmp:
1094 case OperationType::Other:
1095 break;
1096 }
1097 }
1098
1099 /// Set the IR flags for \p I.
1100 void setFlags(Instruction *I) const {
1101 switch (OpType) {
1102 case OperationType::OverflowingBinOp:
1103 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1104 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1105 break;
1106 case OperationType::DisjointOp:
1107 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1108 break;
1109 case OperationType::PossiblyExactOp:
1110 I->setIsExact(ExactFlags.IsExact);
1111 break;
1112 case OperationType::GEPOp:
1113 // TODO(gep_nowrap): Track the full GEPNoWrapFlags in VPlan.
1114 cast<GetElementPtrInst>(I)->setNoWrapFlags(
1117 break;
1118 case OperationType::FPMathOp:
1119 I->setHasAllowReassoc(FMFs.AllowReassoc);
1120 I->setHasNoNaNs(FMFs.NoNaNs);
1121 I->setHasNoInfs(FMFs.NoInfs);
1122 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1123 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1124 I->setHasAllowContract(FMFs.AllowContract);
1125 I->setHasApproxFunc(FMFs.ApproxFunc);
1126 break;
1127 case OperationType::NonNegOp:
1128 I->setNonNeg(NonNegFlags.NonNeg);
1129 break;
1130 case OperationType::Cmp:
1131 case OperationType::Other:
1132 break;
1133 }
1134 }
1135
1137 assert(OpType == OperationType::Cmp &&
1138 "recipe doesn't have a compare predicate");
1139 return CmpPredicate;
1140 }
1141
1142 bool isInBounds() const {
1143 assert(OpType == OperationType::GEPOp &&
1144 "recipe doesn't have inbounds flag");
1145 return GEPFlags.IsInBounds;
1146 }
1147
1148 /// Returns true if the recipe has fast-math flags.
1149 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1150
1152
1153 bool hasNoUnsignedWrap() const {
1154 assert(OpType == OperationType::OverflowingBinOp &&
1155 "recipe doesn't have a NUW flag");
1156 return WrapFlags.HasNUW;
1157 }
1158
1159 bool hasNoSignedWrap() const {
1160 assert(OpType == OperationType::OverflowingBinOp &&
1161 "recipe doesn't have a NSW flag");
1162 return WrapFlags.HasNSW;
1163 }
1164
1165 bool isDisjoint() const {
1166 assert(OpType == OperationType::DisjointOp &&
1167 "recipe cannot have a disjoing flag");
1169 }
1170
1171#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1172 void printFlags(raw_ostream &O) const;
1173#endif
1174};
1175
1176/// This is a concrete Recipe that models a single VPlan-level instruction.
1177/// While as any Recipe it may generate a sequence of IR instructions when
1178/// executed, these instructions would always form a single-def expression as
1179/// the VPInstruction is also a single def-use vertex.
1181 friend class VPlanSlp;
1182
1183public:
1184 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1185 enum {
1187 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1188 // values of a first-order recurrence.
1195 // Increment the canonical IV separately for each unrolled part.
1200 // Takes the VPValue to extract from as first operand and the lane or part
1201 // to extract as second operand, counting from the end starting with 1 for
1202 // last. The second operand must be a positive constant and <= VF when
1203 // extracting from a vector or <= UF when extracting from an unrolled
1204 // scalar.
1206 LogicalAnd, // Non-poison propagating logical And.
1207 // Add an offset in bytes (second operand) to a base pointer (first
1208 // operand). Only generates scalar values (either for the first lane only or
1209 // for all lanes, depending on its uses).
1211 };
1212
1213private:
1214 typedef unsigned char OpcodeTy;
1215 OpcodeTy Opcode;
1216
1217 /// An optional name that can be used for the generated IR instruction.
1218 const std::string Name;
1219
1220 /// Returns true if this VPInstruction generates scalar values for all lanes.
1221 /// Most VPInstructions generate a single value per part, either vector or
1222 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1223 /// values per all lanes, stemming from an original ingredient. This method
1224 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1225 /// underlying ingredient.
1226 bool doesGeneratePerAllLanes() const;
1227
1228 /// Returns true if we can generate a scalar for the first lane only if
1229 /// needed.
1230 bool canGenerateScalarForFirstLane() const;
1231
1232 /// Utility methods serving execute(): generates a single instance of the
1233 /// modeled instruction for a given part. \returns the generated value for \p
1234 /// Part. In some cases an existing value is returned rather than a generated
1235 /// one.
1236 Value *generatePerPart(VPTransformState &State, unsigned Part);
1237
1238 /// Utility methods serving execute(): generates a scalar single instance of
1239 /// the modeled instruction for a given lane. \returns the scalar generated
1240 /// value for lane \p Lane.
1241 Value *generatePerLane(VPTransformState &State, const VPIteration &Lane);
1242
1243#if !defined(NDEBUG)
1244 /// Return true if the VPInstruction is a floating point math operation, i.e.
1245 /// has fast-math flags.
1246 bool isFPMathOp() const;
1247#endif
1248
1249public:
1251 const Twine &Name = "")
1252 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1253 Opcode(Opcode), Name(Name.str()) {}
1254
1255 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1256 DebugLoc DL = {}, const Twine &Name = "")
1258
1259 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1260 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1261
1262 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1263 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1264 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1265 Opcode(Opcode), Name(Name.str()) {}
1266
1267 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1268 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1269 const Twine &Name = "")
1270 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1271 Opcode(Opcode), Name(Name.str()) {
1272 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1273 }
1274
1275 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1276 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1277
1278 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1279
1280 VPInstruction *clone() override {
1282 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1283 New->transferFlags(*this);
1284 return New;
1285 }
1286
1287 unsigned getOpcode() const { return Opcode; }
1288
1289 /// Generate the instruction.
1290 /// TODO: We currently execute only per-part unless a specific instance is
1291 /// provided.
1292 void execute(VPTransformState &State) override;
1293
1294#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1295 /// Print the VPInstruction to \p O.
1296 void print(raw_ostream &O, const Twine &Indent,
1297 VPSlotTracker &SlotTracker) const override;
1298
1299 /// Print the VPInstruction to dbgs() (for debugging).
1300 LLVM_DUMP_METHOD void dump() const;
1301#endif
1302
1303 /// Return true if this instruction may modify memory.
1304 bool mayWriteToMemory() const {
1305 // TODO: we can use attributes of the called function to rule out memory
1306 // modifications.
1307 return Opcode == Instruction::Store || Opcode == Instruction::Call ||
1308 Opcode == Instruction::Invoke || Opcode == SLPStore;
1309 }
1310
1311 bool hasResult() const {
1312 // CallInst may or may not have a result, depending on the called function.
1313 // Conservatively return calls have results for now.
1314 switch (getOpcode()) {
1315 case Instruction::Ret:
1316 case Instruction::Br:
1317 case Instruction::Store:
1318 case Instruction::Switch:
1319 case Instruction::IndirectBr:
1320 case Instruction::Resume:
1321 case Instruction::CatchRet:
1322 case Instruction::Unreachable:
1323 case Instruction::Fence:
1324 case Instruction::AtomicRMW:
1327 return false;
1328 default:
1329 return true;
1330 }
1331 }
1332
1333 /// Returns true if the recipe only uses the first lane of operand \p Op.
1334 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1335
1336 /// Returns true if the recipe only uses the first part of operand \p Op.
1337 bool onlyFirstPartUsed(const VPValue *Op) const override;
1338
1339 /// Returns true if this VPInstruction produces a scalar value from a vector,
1340 /// e.g. by performing a reduction or extracting a lane.
1341 bool isVectorToScalar() const;
1342};
1343
1344/// VPWidenRecipe is a recipe for producing a copy of vector type its
1345/// ingredient. This recipe covers most of the traditional vectorization cases
1346/// where each ingredient transforms into a vectorized version of itself.
1348 unsigned Opcode;
1349
1350public:
1351 template <typename IterT>
1353 : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1354 Opcode(I.getOpcode()) {}
1355
1356 ~VPWidenRecipe() override = default;
1357
1358 VPWidenRecipe *clone() override {
1359 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1360 R->transferFlags(*this);
1361 return R;
1362 }
1363
1364 VP_CLASSOF_IMPL(VPDef::VPWidenSC)
1365
1366 /// Produce widened copies of all Ingredients.
1367 void execute(VPTransformState &State) override;
1368
1369 unsigned getOpcode() const { return Opcode; }
1370
1371#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1372 /// Print the recipe.
1373 void print(raw_ostream &O, const Twine &Indent,
1374 VPSlotTracker &SlotTracker) const override;
1375#endif
1376};
1377
1378/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1380 /// Cast instruction opcode.
1381 Instruction::CastOps Opcode;
1382
1383 /// Result type for the cast.
1384 Type *ResultTy;
1385
1386public:
1388 CastInst &UI)
1389 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1390 ResultTy(ResultTy) {
1391 assert(UI.getOpcode() == Opcode &&
1392 "opcode of underlying cast doesn't match");
1393 assert(UI.getType() == ResultTy &&
1394 "result type of underlying cast doesn't match");
1395 }
1396
1398 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1399 ResultTy(ResultTy) {}
1400
1401 ~VPWidenCastRecipe() override = default;
1402
1404 if (auto *UV = getUnderlyingValue())
1405 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1406 *cast<CastInst>(UV));
1407
1408 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1409 }
1410
1411 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1412
1413 /// Produce widened copies of the cast.
1414 void execute(VPTransformState &State) override;
1415
1416#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1417 /// Print the recipe.
1418 void print(raw_ostream &O, const Twine &Indent,
1419 VPSlotTracker &SlotTracker) const override;
1420#endif
1421
1422 Instruction::CastOps getOpcode() const { return Opcode; }
1423
1424 /// Returns the result type of the cast.
1425 Type *getResultType() const { return ResultTy; }
1426};
1427
1428/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1430 Instruction::CastOps Opcode;
1431
1432 Type *ResultTy;
1433
1434 Value *generate(VPTransformState &State, unsigned Part);
1435
1436public:
1438 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1439 ResultTy(ResultTy) {}
1440
1441 ~VPScalarCastRecipe() override = default;
1442
1444 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1445 }
1446
1447 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1448
1449 void execute(VPTransformState &State) override;
1450
1451#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1452 void print(raw_ostream &O, const Twine &Indent,
1453 VPSlotTracker &SlotTracker) const override;
1454#endif
1455
1456 /// Returns the result type of the cast.
1457 Type *getResultType() const { return ResultTy; }
1458
1459 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1460 // At the moment, only uniform codegen is implemented.
1462 "Op must be an operand of the recipe");
1463 return true;
1464 }
1465};
1466
1467/// A recipe for widening Call instructions.
1469 /// ID of the vector intrinsic to call when widening the call. If set the
1470 /// Intrinsic::not_intrinsic, a library call will be used instead.
1471 Intrinsic::ID VectorIntrinsicID;
1472 /// If this recipe represents a library call, Variant stores a pointer to
1473 /// the chosen function. There is a 1:1 mapping between a given VF and the
1474 /// chosen vectorized variant, so there will be a different vplan for each
1475 /// VF with a valid variant.
1476 Function *Variant;
1477
1478public:
1479 template <typename IterT>
1481 Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
1482 Function *Variant = nullptr)
1483 : VPSingleDefRecipe(VPDef::VPWidenCallSC, CallArguments, UV, DL),
1484 VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
1485 assert(
1486 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1487 "last operand must be the called function");
1488 }
1489
1490 ~VPWidenCallRecipe() override = default;
1491
1494 VectorIntrinsicID, getDebugLoc(), Variant);
1495 }
1496
1497 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1498
1499 /// Produce a widened version of the call instruction.
1500 void execute(VPTransformState &State) override;
1501
1503 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1504 }
1505
1507 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1508 }
1510 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1511 }
1512
1513#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1514 /// Print the recipe.
1515 void print(raw_ostream &O, const Twine &Indent,
1516 VPSlotTracker &SlotTracker) const override;
1517#endif
1518};
1519
1520/// A recipe for widening select instructions.
1522 template <typename IterT>
1524 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1525 I.getDebugLoc()) {}
1526
1527 ~VPWidenSelectRecipe() override = default;
1528
1530 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1531 operands());
1532 }
1533
1534 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1535
1536 /// Produce a widened version of the select instruction.
1537 void execute(VPTransformState &State) override;
1538
1539#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1540 /// Print the recipe.
1541 void print(raw_ostream &O, const Twine &Indent,
1542 VPSlotTracker &SlotTracker) const override;
1543#endif
1544
1545 VPValue *getCond() const {
1546 return getOperand(0);
1547 }
1548
1549 bool isInvariantCond() const {
1551 }
1552};
1553
1554/// A recipe for handling GEP instructions.
1556 bool isPointerLoopInvariant() const {
1558 }
1559
1560 bool isIndexLoopInvariant(unsigned I) const {
1562 }
1563
1564 bool areAllOperandsInvariant() const {
1565 return all_of(operands(), [](VPValue *Op) {
1566 return Op->isDefinedOutsideVectorRegions();
1567 });
1568 }
1569
1570public:
1571 template <typename IterT>
1573 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1574
1575 ~VPWidenGEPRecipe() override = default;
1576
1578 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1579 operands());
1580 }
1581
1582 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1583
1584 /// Generate the gep nodes.
1585 void execute(VPTransformState &State) override;
1586
1587#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1588 /// Print the recipe.
1589 void print(raw_ostream &O, const Twine &Indent,
1590 VPSlotTracker &SlotTracker) const override;
1591#endif
1592};
1593
1594/// A recipe to compute the pointers for widened memory accesses of IndexTy for
1595/// all parts. If IsReverse is true, compute pointers for accessing the input in
1596/// reverse order per part.
1598 Type *IndexedTy;
1599 bool IsReverse;
1600
1601public:
1602 VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse,
1603 bool IsInBounds, DebugLoc DL)
1604 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1605 GEPFlagsTy(IsInBounds), DL),
1606 IndexedTy(IndexedTy), IsReverse(IsReverse) {}
1607
1608 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1609
1610 void execute(VPTransformState &State) override;
1611
1612 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1614 "Op must be an operand of the recipe");
1615 return true;
1616 }
1617
1619 return new VPVectorPointerRecipe(getOperand(0), IndexedTy, IsReverse,
1620 isInBounds(), getDebugLoc());
1621 }
1622
1623#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1624 /// Print the recipe.
1625 void print(raw_ostream &O, const Twine &Indent,
1626 VPSlotTracker &SlotTracker) const override;
1627#endif
1628};
1629
1630/// A pure virtual base class for all recipes modeling header phis, including
1631/// phis for first order recurrences, pointer inductions and reductions. The
1632/// start value is the first operand of the recipe and the incoming value from
1633/// the backedge is the second operand.
1634///
1635/// Inductions are modeled using the following sub-classes:
1636/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
1637/// starting at a specified value (zero for the main vector loop, the resume
1638/// value for the epilogue vector loop) and stepping by 1. The induction
1639/// controls exiting of the vector loop by comparing against the vector trip
1640/// count. Produces a single scalar PHI for the induction value per
1641/// iteration.
1642/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
1643/// floating point inductions with arbitrary start and step values. Produces
1644/// a vector PHI per-part.
1645/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
1646/// value of an IV with different start and step values. Produces a single
1647/// scalar value per iteration
1648/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
1649/// canonical or derived induction.
1650/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
1651/// pointer induction. Produces either a vector PHI per-part or scalar values
1652/// per-lane based on the canonical induction.
1654protected:
1655 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
1656 VPValue *Start = nullptr, DebugLoc DL = {})
1657 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
1658 if (Start)
1659 addOperand(Start);
1660 }
1661
1662public:
1663 ~VPHeaderPHIRecipe() override = default;
1664
1665 /// Method to support type inquiry through isa, cast, and dyn_cast.
1666 static inline bool classof(const VPRecipeBase *B) {
1667 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
1668 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
1669 }
1670 static inline bool classof(const VPValue *V) {
1671 auto *B = V->getDefiningRecipe();
1672 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
1673 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
1674 }
1675
1676 /// Generate the phi nodes.
1677 void execute(VPTransformState &State) override = 0;
1678
1679#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1680 /// Print the recipe.
1681 void print(raw_ostream &O, const Twine &Indent,
1682 VPSlotTracker &SlotTracker) const override = 0;
1683#endif
1684
1685 /// Returns the start value of the phi, if one is set.
1687 return getNumOperands() == 0 ? nullptr : getOperand(0);
1688 }
1690 return getNumOperands() == 0 ? nullptr : getOperand(0);
1691 }
1692
1693 /// Update the start value of the recipe.
1695
1696 /// Returns the incoming value from the loop backedge.
1698 return getOperand(1);
1699 }
1700
1701 /// Returns the backedge value as a recipe. The backedge value is guaranteed
1702 /// to be a recipe.
1705 }
1706};
1707
1708/// A recipe for handling phi nodes of integer and floating-point inductions,
1709/// producing their vector values.
1711 PHINode *IV;
1712 TruncInst *Trunc;
1713 const InductionDescriptor &IndDesc;
1714
1715public:
1717 const InductionDescriptor &IndDesc)
1718 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
1719 Trunc(nullptr), IndDesc(IndDesc) {
1720 addOperand(Step);
1721 }
1722
1724 const InductionDescriptor &IndDesc,
1725 TruncInst *Trunc)
1726 : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
1727 IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
1728 addOperand(Step);
1729 }
1730
1732
1735 getStepValue(), IndDesc, Trunc);
1736 }
1737
1738 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
1739
1740 /// Generate the vectorized and scalarized versions of the phi node as
1741 /// needed by their users.
1742 void execute(VPTransformState &State) override;
1743
1744#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1745 /// Print the recipe.
1746 void print(raw_ostream &O, const Twine &Indent,
1747 VPSlotTracker &SlotTracker) const override;
1748#endif
1749
1751 // TODO: All operands of base recipe must exist and be at same index in
1752 // derived recipe.
1754 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1755 }
1756
1758 // TODO: All operands of base recipe must exist and be at same index in
1759 // derived recipe.
1761 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
1762 }
1763
1764 /// Returns the step value of the induction.
1766 const VPValue *getStepValue() const { return getOperand(1); }
1767
1768 /// Returns the first defined value as TruncInst, if it is one or nullptr
1769 /// otherwise.
1770 TruncInst *getTruncInst() { return Trunc; }
1771 const TruncInst *getTruncInst() const { return Trunc; }
1772
1773 PHINode *getPHINode() { return IV; }
1774
1775 /// Returns the induction descriptor for the recipe.
1776 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1777
1778 /// Returns true if the induction is canonical, i.e. starting at 0 and
1779 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
1780 /// same type as the canonical induction.
1781 bool isCanonical() const;
1782
1783 /// Returns the scalar type of the induction.
1785 return Trunc ? Trunc->getType() : IV->getType();
1786 }
1787};
1788
1790 const InductionDescriptor &IndDesc;
1791
1792 bool IsScalarAfterVectorization;
1793
1794public:
1795 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
1796 /// Start.
1798 const InductionDescriptor &IndDesc,
1799 bool IsScalarAfterVectorization)
1800 : VPHeaderPHIRecipe(VPDef::VPWidenPointerInductionSC, Phi),
1801 IndDesc(IndDesc),
1802 IsScalarAfterVectorization(IsScalarAfterVectorization) {
1803 addOperand(Start);
1804 addOperand(Step);
1805 }
1806
1808
1811 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
1812 IndDesc, IsScalarAfterVectorization);
1813 }
1814
1815 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
1816
1817 /// Generate vector values for the pointer induction.
1818 void execute(VPTransformState &State) override;
1819
1820 /// Returns true if only scalar values will be generated.
1821 bool onlyScalarsGenerated(bool IsScalable);
1822
1823 /// Returns the induction descriptor for the recipe.
1824 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
1825
1826#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1827 /// Print the recipe.
1828 void print(raw_ostream &O, const Twine &Indent,
1829 VPSlotTracker &SlotTracker) const override;
1830#endif
1831};
1832
1833/// A recipe for handling phis that are widened in the vector loop.
1834/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
1835/// managed in the recipe directly.
1837 /// List of incoming blocks. Only used in the VPlan native path.
1838 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
1839
1840public:
1841 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
1842 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
1843 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
1844 if (Start)
1845 addOperand(Start);
1846 }
1847
1849 llvm_unreachable("cloning not implemented yet");
1850 }
1851
1852 ~VPWidenPHIRecipe() override = default;
1853
1854 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
1855
1856 /// Generate the phi/select nodes.
1857 void execute(VPTransformState &State) override;
1858
1859#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1860 /// Print the recipe.
1861 void print(raw_ostream &O, const Twine &Indent,
1862 VPSlotTracker &SlotTracker) const override;
1863#endif
1864
1865 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
1866 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
1867 addOperand(IncomingV);
1868 IncomingBlocks.push_back(IncomingBlock);
1869 }
1870
1871 /// Returns the \p I th incoming VPBasicBlock.
1872 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
1873
1874 /// Returns the \p I th incoming VPValue.
1875 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
1876};
1877
1878/// A recipe for handling first-order recurrence phis. The start value is the
1879/// first operand of the recipe and the incoming value from the backedge is the
1880/// second operand.
1883 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
1884
1885 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
1886
1888 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
1889 }
1890
1893 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
1894 }
1895
1896 void execute(VPTransformState &State) override;
1897
1898#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1899 /// Print the recipe.
1900 void print(raw_ostream &O, const Twine &Indent,
1901 VPSlotTracker &SlotTracker) const override;
1902#endif
1903};
1904
1905/// A recipe for handling reduction phis. The start value is the first operand
1906/// of the recipe and the incoming value from the backedge is the second
1907/// operand.
1909 /// Descriptor for the reduction.
1910 const RecurrenceDescriptor &RdxDesc;
1911
1912 /// The phi is part of an in-loop reduction.
1913 bool IsInLoop;
1914
1915 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
1916 bool IsOrdered;
1917
1918public:
1919 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
1920 /// RdxDesc.
1922 VPValue &Start, bool IsInLoop = false,
1923 bool IsOrdered = false)
1924 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
1925 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
1926 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
1927 }
1928
1929 ~VPReductionPHIRecipe() override = default;
1930
1932 auto *R =
1933 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
1934 *getOperand(0), IsInLoop, IsOrdered);
1935 R->addOperand(getBackedgeValue());
1936 return R;
1937 }
1938
1939 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
1940
1942 return R->getVPDefID() == VPDef::VPReductionPHISC;
1943 }
1944
1945 /// Generate the phi/select nodes.
1946 void execute(VPTransformState &State) override;
1947
1948#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1949 /// Print the recipe.
1950 void print(raw_ostream &O, const Twine &Indent,
1951 VPSlotTracker &SlotTracker) const override;
1952#endif
1953
1955 return RdxDesc;
1956 }
1957
1958 /// Returns true, if the phi is part of an ordered reduction.
1959 bool isOrdered() const { return IsOrdered; }
1960
1961 /// Returns true, if the phi is part of an in-loop reduction.
1962 bool isInLoop() const { return IsInLoop; }
1963};
1964
1965/// A recipe for vectorizing a phi-node as a sequence of mask-based select
1966/// instructions.
1968public:
1969 /// The blend operation is a User of the incoming values and of their
1970 /// respective masks, ordered [I0, I1, M1, I2, M2, ...]. Note that the first
1971 /// incoming value does not have a mask associated.
1973 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
1974 assert((Operands.size() + 1) % 2 == 0 &&
1975 "Expected an odd number of operands");
1976 }
1977
1978 VPBlendRecipe *clone() override {
1980 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
1981 }
1982
1983 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
1984
1985 /// Return the number of incoming values, taking into account that the first
1986 /// incoming value has no mask.
1987 unsigned getNumIncomingValues() const { return (getNumOperands() + 1) / 2; }
1988
1989 /// Return incoming value number \p Idx.
1990 VPValue *getIncomingValue(unsigned Idx) const {
1991 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - 1);
1992 }
1993
1994 /// Return mask number \p Idx.
1995 VPValue *getMask(unsigned Idx) const {
1996 assert(Idx > 0 && "First index has no mask associated.");
1997 return getOperand(Idx * 2);
1998 }
1999
2000 /// Generate the phi/select nodes.
2001 void execute(VPTransformState &State) override;
2002
2003#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2004 /// Print the recipe.
2005 void print(raw_ostream &O, const Twine &Indent,
2006 VPSlotTracker &SlotTracker) const override;
2007#endif
2008
2009 /// Returns true if the recipe only uses the first lane of operand \p Op.
2010 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2012 "Op must be an operand of the recipe");
2013 // Recursing through Blend recipes only, must terminate at header phi's the
2014 // latest.
2015 return all_of(users(),
2016 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2017 }
2018};
2019
2020/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2021/// or stores into one wide load/store and shuffles. The first operand of a
2022/// VPInterleave recipe is the address, followed by the stored values, followed
2023/// by an optional mask.
2026
2027 /// Indicates if the interleave group is in a conditional block and requires a
2028 /// mask.
2029 bool HasMask = false;
2030
2031 /// Indicates if gaps between members of the group need to be masked out or if
2032 /// unusued gaps can be loaded speculatively.
2033 bool NeedsMaskForGaps = false;
2034
2035public:
2037 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2038 bool NeedsMaskForGaps)
2039 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2040 NeedsMaskForGaps(NeedsMaskForGaps) {
2041 for (unsigned i = 0; i < IG->getFactor(); ++i)
2042 if (Instruction *I = IG->getMember(i)) {
2043 if (I->getType()->isVoidTy())
2044 continue;
2045 new VPValue(I, this);
2046 }
2047
2048 for (auto *SV : StoredValues)
2049 addOperand(SV);
2050 if (Mask) {
2051 HasMask = true;
2052 addOperand(Mask);
2053 }
2054 }
2055 ~VPInterleaveRecipe() override = default;
2056
2058 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2059 NeedsMaskForGaps);
2060 }
2061
2062 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2063
2064 /// Return the address accessed by this recipe.
2065 VPValue *getAddr() const {
2066 return getOperand(0); // Address is the 1st, mandatory operand.
2067 }
2068
2069 /// Return the mask used by this recipe. Note that a full mask is represented
2070 /// by a nullptr.
2071 VPValue *getMask() const {
2072 // Mask is optional and therefore the last, currently 2nd operand.
2073 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2074 }
2075
2076 /// Return the VPValues stored by this interleave group. If it is a load
2077 /// interleave group, return an empty ArrayRef.
2079 // The first operand is the address, followed by the stored values, followed
2080 // by an optional mask.
2083 }
2084
2085 /// Generate the wide load or store, and shuffles.
2086 void execute(VPTransformState &State) override;
2087
2088#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2089 /// Print the recipe.
2090 void print(raw_ostream &O, const Twine &Indent,
2091 VPSlotTracker &SlotTracker) const override;
2092#endif
2093
2095
2096 /// Returns the number of stored operands of this interleave group. Returns 0
2097 /// for load interleave groups.
2098 unsigned getNumStoreOperands() const {
2099 return getNumOperands() - (HasMask ? 2 : 1);
2100 }
2101
2102 /// The recipe only uses the first lane of the address.
2103 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2105 "Op must be an operand of the recipe");
2106 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2107 }
2108};
2109
2110/// A recipe to represent inloop reduction operations, performing a reduction on
2111/// a vector operand into a scalar value, and adding the result to a chain.
2112/// The Operands are {ChainOp, VecOp, [Condition]}.
2114 /// The recurrence decriptor for the reduction in question.
2115 const RecurrenceDescriptor &RdxDesc;
2116 bool IsOrdered;
2117
2118public:
2120 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2121 bool IsOrdered)
2122 : VPSingleDefRecipe(VPDef::VPReductionSC,
2123 ArrayRef<VPValue *>({ChainOp, VecOp}), I),
2124 RdxDesc(R), IsOrdered(IsOrdered) {
2125 if (CondOp)
2126 addOperand(CondOp);
2127 }
2128
2129 ~VPReductionRecipe() override = default;
2130
2132 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2133 getVecOp(), getCondOp(), IsOrdered);
2134 }
2135
2136 VP_CLASSOF_IMPL(VPDef::VPReductionSC)
2137
2138 /// Generate the reduction in the loop
2139 void execute(VPTransformState &State) override;
2140
2141#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2142 /// Print the recipe.
2143 void print(raw_ostream &O, const Twine &Indent,
2144 VPSlotTracker &SlotTracker) const override;
2145#endif
2146
2147 /// The VPValue of the scalar Chain being accumulated.
2148 VPValue *getChainOp() const { return getOperand(0); }
2149 /// The VPValue of the vector value to be reduced.
2150 VPValue *getVecOp() const { return getOperand(1); }
2151 /// The VPValue of the condition for the block.
2153 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2154 }
2155};
2156
2157/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2158/// copies of the original scalar type, one per lane, instead of producing a
2159/// single copy of widened type for all lanes. If the instruction is known to be
2160/// uniform only one copy, per lane zero, will be generated.
2162 /// Indicator if only a single replica per lane is needed.
2163 bool IsUniform;
2164
2165 /// Indicator if the replicas are also predicated.
2166 bool IsPredicated;
2167
2168public:
2169 template <typename IterT>
2171 bool IsUniform, VPValue *Mask = nullptr)
2172 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2173 IsUniform(IsUniform), IsPredicated(Mask) {
2174 if (Mask)
2175 addOperand(Mask);
2176 }
2177
2178 ~VPReplicateRecipe() override = default;
2179
2181 auto *Copy =
2182 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2183 isPredicated() ? getMask() : nullptr);
2184 Copy->transferFlags(*this);
2185 return Copy;
2186 }
2187
2188 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2189
2190 /// Generate replicas of the desired Ingredient. Replicas will be generated
2191 /// for all parts and lanes unless a specific part and lane are specified in
2192 /// the \p State.
2193 void execute(VPTransformState &State) override;
2194
2195#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2196 /// Print the recipe.
2197 void print(raw_ostream &O, const Twine &Indent,
2198 VPSlotTracker &SlotTracker) const override;
2199#endif
2200
2201 bool isUniform() const { return IsUniform; }
2202
2203 bool isPredicated() const { return IsPredicated; }
2204
2205 /// Returns true if the recipe only uses the first lane of operand \p Op.
2206 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2208 "Op must be an operand of the recipe");
2209 return isUniform();
2210 }
2211
2212 /// Returns true if the recipe uses scalars of operand \p Op.
2213 bool usesScalars(const VPValue *Op) const override {
2215 "Op must be an operand of the recipe");
2216 return true;
2217 }
2218
2219 /// Returns true if the recipe is used by a widened recipe via an intervening
2220 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2221 /// in a vector.
2222 bool shouldPack() const;
2223
2224 /// Return the mask of a predicated VPReplicateRecipe.
2226 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2227 return getOperand(getNumOperands() - 1);
2228 }
2229
2230 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2231};
2232
2233/// A recipe for generating conditional branches on the bits of a mask.
2235public:
2237 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2238 if (BlockInMask) // nullptr means all-one mask.
2239 addOperand(BlockInMask);
2240 }
2241
2243 return new VPBranchOnMaskRecipe(getOperand(0));
2244 }
2245
2246 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2247
2248 /// Generate the extraction of the appropriate bit from the block mask and the
2249 /// conditional branch.
2250 void execute(VPTransformState &State) override;
2251
2252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2253 /// Print the recipe.
2254 void print(raw_ostream &O, const Twine &Indent,
2255 VPSlotTracker &SlotTracker) const override {
2256 O << Indent << "BRANCH-ON-MASK ";
2257 if (VPValue *Mask = getMask())
2258 Mask->printAsOperand(O, SlotTracker);
2259 else
2260 O << " All-One";
2261 }
2262#endif
2263
2264 /// Return the mask used by this recipe. Note that a full mask is represented
2265 /// by a nullptr.
2266 VPValue *getMask() const {
2267 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2268 // Mask is optional.
2269 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2270 }
2271
2272 /// Returns true if the recipe uses scalars of operand \p Op.
2273 bool usesScalars(const VPValue *Op) const override {
2275 "Op must be an operand of the recipe");
2276 return true;
2277 }
2278};
2279
2280/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2281/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2282/// order to merge values that are set under such a branch and feed their uses.
2283/// The phi nodes can be scalar or vector depending on the users of the value.
2284/// This recipe works in concert with VPBranchOnMaskRecipe.
2286public:
2287 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2288 /// nodes after merging back from a Branch-on-Mask.
2290 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2291 ~VPPredInstPHIRecipe() override = default;
2292
2294 return new VPPredInstPHIRecipe(getOperand(0));
2295 }
2296
2297 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2298
2299 /// Generates phi nodes for live-outs as needed to retain SSA form.
2300 void execute(VPTransformState &State) override;
2301
2302#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2303 /// Print the recipe.
2304 void print(raw_ostream &O, const Twine &Indent,
2305 VPSlotTracker &SlotTracker) const override;
2306#endif
2307
2308 /// Returns true if the recipe uses scalars of operand \p Op.
2309 bool usesScalars(const VPValue *Op) const override {
2311 "Op must be an operand of the recipe");
2312 return true;
2313 }
2314};
2315
2316/// A common base class for widening memory operations. An optional mask can be
2317/// provided as the last operand.
2319protected:
2321
2322 /// Whether the accessed addresses are consecutive.
2324
2325 /// Whether the consecutive accessed addresses are in reverse order.
2327
2328 /// Whether the memory access is masked.
2329 bool IsMasked = false;
2330
2331 void setMask(VPValue *Mask) {
2332 assert(!IsMasked && "cannot re-set mask");
2333 if (!Mask)
2334 return;
2335 addOperand(Mask);
2336 IsMasked = true;
2337 }
2338
2339 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2340 std::initializer_list<VPValue *> Operands,
2341 bool Consecutive, bool Reverse, DebugLoc DL)
2343 Reverse(Reverse) {
2344 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2345 }
2346
2347public:
2349 llvm_unreachable("cloning not supported");
2350 }
2351
2352 static inline bool classof(const VPRecipeBase *R) {
2353 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2354 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2355 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2356 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2357 }
2358
2359 static inline bool classof(const VPUser *U) {
2360 auto *R = dyn_cast<VPRecipeBase>(U);
2361 return R && classof(R);
2362 }
2363
2364 /// Return whether the loaded-from / stored-to addresses are consecutive.
2365 bool isConsecutive() const { return Consecutive; }
2366
2367 /// Return whether the consecutive loaded/stored addresses are in reverse
2368 /// order.
2369 bool isReverse() const { return Reverse; }
2370
2371 /// Return the address accessed by this recipe.
2372 VPValue *getAddr() const { return getOperand(0); }
2373
2374 /// Returns true if the recipe is masked.
2375 bool isMasked() const { return IsMasked; }
2376
2377 /// Return the mask used by this recipe. Note that a full mask is represented
2378 /// by a nullptr.
2379 VPValue *getMask() const {
2380 // Mask is optional and therefore the last operand.
2381 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2382 }
2383
2384 /// Generate the wide load/store.
2385 void execute(VPTransformState &State) override {
2386 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2387 }
2388
2390};
2391
2392/// A recipe for widening load operations, using the address to load from and an
2393/// optional mask.
2394struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2396 bool Consecutive, bool Reverse, DebugLoc DL)
2397 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2398 Reverse, DL),
2399 VPValue(this, &Load) {
2400 setMask(Mask);
2401 }
2402
2404 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2406 getDebugLoc());
2407 }
2408
2409 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2410
2411 /// Generate a wide load or gather.
2412 void execute(VPTransformState &State) override;
2413
2414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2415 /// Print the recipe.
2416 void print(raw_ostream &O, const Twine &Indent,
2417 VPSlotTracker &SlotTracker) const override;
2418#endif
2419
2420 /// Returns true if the recipe only uses the first lane of operand \p Op.
2421 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2423 "Op must be an operand of the recipe");
2424 // Widened, consecutive loads operations only demand the first lane of
2425 // their address.
2426 return Op == getAddr() && isConsecutive();
2427 }
2428};
2429
2430/// A recipe for widening load operations with vector-predication intrinsics,
2431/// using the address to load from, the explicit vector length and an optional
2432/// mask.
2433struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
2435 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L->getIngredient(),
2436 {L->getAddr(), EVL}, L->isConsecutive(),
2437 L->isReverse(), L->getDebugLoc()),
2438 VPValue(this, &getIngredient()) {
2439 setMask(Mask);
2440 }
2441
2442 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
2443
2444 /// Return the EVL operand.
2445 VPValue *getEVL() const { return getOperand(1); }
2446
2447 /// Generate the wide load or gather.
2448 void execute(VPTransformState &State) override;
2449
2450#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2451 /// Print the recipe.
2452 void print(raw_ostream &O, const Twine &Indent,
2453 VPSlotTracker &SlotTracker) const override;
2454#endif
2455
2456 /// Returns true if the recipe only uses the first lane of operand \p Op.
2457 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2459 "Op must be an operand of the recipe");
2460 // Widened loads only demand the first lane of EVL and consecutive loads
2461 // only demand the first lane of their address.
2462 return Op == getEVL() || (Op == getAddr() && isConsecutive());
2463 }
2464};
2465
2466/// A recipe for widening store operations, using the stored value, the address
2467/// to store to and an optional mask.
2470 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
2471 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
2473 setMask(Mask);
2474 }
2475
2477 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
2479 Reverse, getDebugLoc());
2480 }
2481
2482 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
2483
2484 /// Return the value stored by this recipe.
2485 VPValue *getStoredValue() const { return getOperand(1); }
2486
2487 /// Generate a wide store or scatter.
2488 void execute(VPTransformState &State) override;
2489
2490#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2491 /// Print the recipe.
2492 void print(raw_ostream &O, const Twine &Indent,
2493 VPSlotTracker &SlotTracker) const override;
2494#endif
2495
2496 /// Returns true if the recipe only uses the first lane of operand \p Op.
2497 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2499 "Op must be an operand of the recipe");
2500 // Widened, consecutive stores only demand the first lane of their address,
2501 // unless the same operand is also stored.
2502 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2503 }
2504};
2505
2506/// A recipe for widening store operations with vector-predication intrinsics,
2507/// using the value to store, the address to store to, the explicit vector
2508/// length and an optional mask.
2511 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S->getIngredient(),
2512 {S->getAddr(), S->getStoredValue(), EVL},
2513 S->isConsecutive(), S->isReverse(),
2514 S->getDebugLoc()) {
2515 setMask(Mask);
2516 }
2517
2518 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
2519
2520 /// Return the address accessed by this recipe.
2521 VPValue *getStoredValue() const { return getOperand(1); }
2522
2523 /// Return the EVL operand.
2524 VPValue *getEVL() const { return getOperand(2); }
2525
2526 /// Generate the wide store or scatter.
2527 void execute(VPTransformState &State) override;
2528
2529#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2530 /// Print the recipe.
2531 void print(raw_ostream &O, const Twine &Indent,
2532 VPSlotTracker &SlotTracker) const override;
2533#endif
2534
2535 /// Returns true if the recipe only uses the first lane of operand \p Op.
2536 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2538 "Op must be an operand of the recipe");
2539 if (Op == getEVL()) {
2540 assert(getStoredValue() != Op && "unexpected store of EVL");
2541 return true;
2542 }
2543 // Widened, consecutive memory operations only demand the first lane of
2544 // their address, unless the same operand is also stored. That latter can
2545 // happen with opaque pointers.
2546 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
2547 }
2548};
2549
2550/// Recipe to expand a SCEV expression.
2552 const SCEV *Expr;
2553 ScalarEvolution &SE;
2554
2555public:
2557 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
2558
2559 ~VPExpandSCEVRecipe() override = default;
2560
2562 return new VPExpandSCEVRecipe(Expr, SE);
2563 }
2564
2565 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
2566
2567 /// Generate a canonical vector induction variable of the vector loop, with
2568 void execute(VPTransformState &State) override;
2569
2570#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2571 /// Print the recipe.
2572 void print(raw_ostream &O, const Twine &Indent,
2573 VPSlotTracker &SlotTracker) const override;
2574#endif
2575
2576 const SCEV *getSCEV() const { return Expr; }
2577};
2578
2579/// Canonical scalar induction phi of the vector loop. Starting at the specified
2580/// start value (either 0 or the resume value when vectorizing the epilogue
2581/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
2582/// canonical induction variable.
2584public:
2586 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
2587
2588 ~VPCanonicalIVPHIRecipe() override = default;
2589
2591 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
2592 R->addOperand(getBackedgeValue());
2593 return R;
2594 }
2595
2596 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
2597
2599 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
2600 }
2601
2602 /// Generate the canonical scalar induction phi of the vector loop.
2603 void execute(VPTransformState &State) override;
2604
2605#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2606 /// Print the recipe.
2607 void print(raw_ostream &O, const Twine &Indent,
2608 VPSlotTracker &SlotTracker) const override;
2609#endif
2610
2611 /// Returns the scalar type of the induction.
2613 return getStartValue()->getLiveInIRValue()->getType();
2614 }
2615
2616 /// Returns true if the recipe only uses the first lane of operand \p Op.
2617 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2619 "Op must be an operand of the recipe");
2620 return true;
2621 }
2622
2623 /// Returns true if the recipe only uses the first part of operand \p Op.
2624 bool onlyFirstPartUsed(const VPValue *Op) const override {
2626 "Op must be an operand of the recipe");
2627 return true;
2628 }
2629
2630 /// Check if the induction described by \p Kind, /p Start and \p Step is
2631 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
2633 VPValue *Step) const;
2634};
2635
2636/// A recipe for generating the active lane mask for the vector loop that is
2637/// used to predicate the vector operations.
2638/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2639/// remove VPActiveLaneMaskPHIRecipe.
2641public:
2643 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
2644 DL) {}
2645
2646 ~VPActiveLaneMaskPHIRecipe() override = default;
2647
2650 }
2651
2652 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
2653
2655 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
2656 }
2657
2658 /// Generate the active lane mask phi of the vector loop.
2659 void execute(VPTransformState &State) override;
2660
2661#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2662 /// Print the recipe.
2663 void print(raw_ostream &O, const Twine &Indent,
2664 VPSlotTracker &SlotTracker) const override;
2665#endif
2666};
2667
2668/// A recipe for generating the phi node for the current index of elements,
2669/// adjusted in accordance with EVL value. It starts at the start value of the
2670/// canonical induction and gets incremented by EVL in each iteration of the
2671/// vector loop.
2673public:
2675 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
2676
2677 ~VPEVLBasedIVPHIRecipe() override = default;
2678
2680 llvm_unreachable("cloning not implemented yet");
2681 }
2682
2683 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
2684
2686 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
2687 }
2688
2689 /// Generate phi for handling IV based on EVL over iterations correctly.
2690 /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
2691 void execute(VPTransformState &State) override;
2692
2693 /// Returns true if the recipe only uses the first lane of operand \p Op.
2694 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2696 "Op must be an operand of the recipe");
2697 return true;
2698 }
2699
2700#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2701 /// Print the recipe.
2702 void print(raw_ostream &O, const Twine &Indent,
2703 VPSlotTracker &SlotTracker) const override;
2704#endif
2705};
2706
2707/// A Recipe for widening the canonical induction variable of the vector loop.
2709public:
2711 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
2712
2713 ~VPWidenCanonicalIVRecipe() override = default;
2714
2716 return new VPWidenCanonicalIVRecipe(
2717 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
2718 }
2719
2720 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
2721
2722 /// Generate a canonical vector induction variable of the vector loop, with
2723 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
2724 /// step = <VF*UF, VF*UF, ..., VF*UF>.
2725 void execute(VPTransformState &State) override;
2726
2727#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2728 /// Print the recipe.
2729 void print(raw_ostream &O, const Twine &Indent,
2730 VPSlotTracker &SlotTracker) const override;
2731#endif
2732};
2733
2734/// A recipe for converting the input value \p IV value to the corresponding
2735/// value of an IV with different start and step values, using Start + IV *
2736/// Step.
2738 /// Kind of the induction.
2740 /// If not nullptr, the floating point induction binary operator. Must be set
2741 /// for floating point inductions.
2742 const FPMathOperator *FPBinOp;
2743
2744public:
2746 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
2748 IndDesc.getKind(),
2749 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
2750 Start, CanonicalIV, Step) {}
2751
2753 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
2754 VPValue *Step)
2755 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
2756 FPBinOp(FPBinOp) {}
2757
2758 ~VPDerivedIVRecipe() override = default;
2759
2761 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
2762 getStepValue());
2763 }
2764
2765 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
2766
2767 /// Generate the transformed value of the induction at offset StartValue (1.
2768 /// operand) + IV (2. operand) * StepValue (3, operand).
2769 void execute(VPTransformState &State) override;
2770
2771#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2772 /// Print the recipe.
2773 void print(raw_ostream &O, const Twine &Indent,
2774 VPSlotTracker &SlotTracker) const override;
2775#endif
2776
2778 return getStartValue()->getLiveInIRValue()->getType();
2779 }
2780
2781 VPValue *getStartValue() const { return getOperand(0); }
2782 VPValue *getStepValue() const { return getOperand(2); }
2783
2784 /// Returns true if the recipe only uses the first lane of operand \p Op.
2785 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2787 "Op must be an operand of the recipe");
2788 return true;
2789 }
2790};
2791
2792/// A recipe for handling phi nodes of integer and floating-point inductions,
2793/// producing their scalar values.
2795 Instruction::BinaryOps InductionOpcode;
2796
2797public:
2800 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
2801 ArrayRef<VPValue *>({IV, Step}), FMFs),
2802 InductionOpcode(Opcode) {}
2803
2805 VPValue *Step)
2807 IV, Step, IndDesc.getInductionOpcode(),
2808 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
2809 ? IndDesc.getInductionBinOp()->getFastMathFlags()
2810 : FastMathFlags()) {}
2811
2812 ~VPScalarIVStepsRecipe() override = default;
2813
2815 return new VPScalarIVStepsRecipe(
2816 getOperand(0), getOperand(1), InductionOpcode,
2818 }
2819
2820 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
2821
2822 /// Generate the scalarized versions of the phi node as needed by their users.
2823 void execute(VPTransformState &State) override;
2824
2825#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2826 /// Print the recipe.
2827 void print(raw_ostream &O, const Twine &Indent,
2828 VPSlotTracker &SlotTracker) const override;
2829#endif
2830
2831 VPValue *getStepValue() const { return getOperand(1); }
2832
2833 /// Returns true if the recipe only uses the first lane of operand \p Op.
2834 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2836 "Op must be an operand of the recipe");
2837 return true;
2838 }
2839};
2840
2841/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
2842/// holds a sequence of zero or more VPRecipe's each representing a sequence of
2843/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
2845public:
2847
2848protected:
2849 /// The VPRecipes held in the order of output instructions to generate.
2851
2852 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
2853 : VPBlockBase(BlockSC, Name.str()) {}
2854
2855public:
2856 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
2857 : VPBlockBase(VPBasicBlockSC, Name.str()) {
2858 if (Recipe)
2859 appendRecipe(Recipe);
2860 }
2861
2862 ~VPBasicBlock() override {
2863 while (!Recipes.empty())
2864 Recipes.pop_back();
2865 }
2866
2867 /// Instruction iterators...
2872
2873 //===--------------------------------------------------------------------===//
2874 /// Recipe iterator methods
2875 ///
2876 inline iterator begin() { return Recipes.begin(); }
2877 inline const_iterator begin() const { return Recipes.begin(); }
2878 inline iterator end() { return Recipes.end(); }
2879 inline const_iterator end() const { return Recipes.end(); }
2880
2881 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
2882 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
2883 inline reverse_iterator rend() { return Recipes.rend(); }
2884 inline const_reverse_iterator rend() const { return Recipes.rend(); }
2885
2886 inline size_t size() const { return Recipes.size(); }
2887 inline bool empty() const { return Recipes.empty(); }
2888 inline const VPRecipeBase &front() const { return Recipes.front(); }
2889 inline VPRecipeBase &front() { return Recipes.front(); }
2890 inline const VPRecipeBase &back() const { return Recipes.back(); }
2891 inline VPRecipeBase &back() { return Recipes.back(); }
2892
2893 /// Returns a reference to the list of recipes.
2895
2896 /// Returns a pointer to a member of the recipe list.
2898 return &VPBasicBlock::Recipes;
2899 }
2900
2901 /// Method to support type inquiry through isa, cast, and dyn_cast.
2902 static inline bool classof(const VPBlockBase *V) {
2903 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
2904 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
2905 }
2906
2907 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
2908 assert(Recipe && "No recipe to append.");
2909 assert(!Recipe->Parent && "Recipe already in VPlan");
2910 Recipe->Parent = this;
2911 Recipes.insert(InsertPt, Recipe);
2912 }
2913
2914 /// Augment the existing recipes of a VPBasicBlock with an additional
2915 /// \p Recipe as the last recipe.
2916 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
2917
2918 /// The method which generates the output IR instructions that correspond to
2919 /// this VPBasicBlock, thereby "executing" the VPlan.
2920 void execute(VPTransformState *State) override;
2921
2922 /// Return the position of the first non-phi node recipe in the block.
2924
2925 /// Returns an iterator range over the PHI-like recipes in the block.
2927 return make_range(begin(), getFirstNonPhi());
2928 }
2929
2930 void dropAllReferences(VPValue *NewValue) override;
2931
2932 /// Split current block at \p SplitAt by inserting a new block between the
2933 /// current block and its successors and moving all recipes starting at
2934 /// SplitAt to the new block. Returns the new block.
2935 VPBasicBlock *splitAt(iterator SplitAt);
2936
2938
2939#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2940 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
2941 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
2942 ///
2943 /// Note that the numbering is applied to the whole VPlan, so printing
2944 /// individual blocks is consistent with the whole VPlan printing.
2945 void print(raw_ostream &O, const Twine &Indent,
2946 VPSlotTracker &SlotTracker) const override;
2947 using VPBlockBase::print; // Get the print(raw_stream &O) version.
2948#endif
2949
2950 /// If the block has multiple successors, return the branch recipe terminating
2951 /// the block. If there are no or only a single successor, return nullptr;
2953 const VPRecipeBase *getTerminator() const;
2954
2955 /// Returns true if the block is exiting it's parent region.
2956 bool isExiting() const;
2957
2958 /// Clone the current block and it's recipes, without updating the operands of
2959 /// the cloned recipes.
2960 VPBasicBlock *clone() override {
2961 auto *NewBlock = new VPBasicBlock(getName());
2962 for (VPRecipeBase &R : *this)
2963 NewBlock->appendRecipe(R.clone());
2964 return NewBlock;
2965 }
2966
2967protected:
2968 /// Execute the recipes in the IR basic block \p BB.
2969 void executeRecipes(VPTransformState *State, BasicBlock *BB);
2970
2971private:
2972 /// Create an IR BasicBlock to hold the output instructions generated by this
2973 /// VPBasicBlock, and return it. Update the CFGState accordingly.
2974 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
2975};
2976
2977/// A special type of VPBasicBlock that wraps an existing IR basic block.
2978/// Recipes of the block get added before the first non-phi instruction in the
2979/// wrapped block.
2980/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
2981/// preheader block.
2983 BasicBlock *IRBB;
2984
2985public:
2987 : VPBasicBlock(VPIRBasicBlockSC,
2988 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
2989 IRBB(IRBB) {}
2990
2991 ~VPIRBasicBlock() override {}
2992
2993 static inline bool classof(const VPBlockBase *V) {
2994 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
2995 }
2996
2997 /// The method which generates the output IR instructions that correspond to
2998 /// this VPBasicBlock, thereby "executing" the VPlan.
2999 void execute(VPTransformState *State) override;
3000
3001 VPIRBasicBlock *clone() override {
3002 auto *NewBlock = new VPIRBasicBlock(IRBB);
3003 for (VPRecipeBase &R : Recipes)
3004 NewBlock->appendRecipe(R.clone());
3005 return NewBlock;
3006 }
3007
3008 BasicBlock *getIRBasicBlock() const { return IRBB; }
3009};
3010
3011/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3012/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3013/// A VPRegionBlock may indicate that its contents are to be replicated several
3014/// times. This is designed to support predicated scalarization, in which a
3015/// scalar if-then code structure needs to be generated VF * UF times. Having
3016/// this replication indicator helps to keep a single model for multiple
3017/// candidate VF's. The actual replication takes place only once the desired VF
3018/// and UF have been determined.
3020 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3021 VPBlockBase *Entry;
3022
3023 /// Hold the Single Exiting block of the SESE region modelled by the
3024 /// VPRegionBlock.
3025 VPBlockBase *Exiting;
3026
3027 /// An indicator whether this region is to generate multiple replicated
3028 /// instances of output IR corresponding to its VPBlockBases.
3029 bool IsReplicator;
3030
3031public:
3033 const std::string &Name = "", bool IsReplicator = false)
3034 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3035 IsReplicator(IsReplicator) {
3036 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3037 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3038 Entry->setParent(this);
3039 Exiting->setParent(this);
3040 }
3041 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3042 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3043 IsReplicator(IsReplicator) {}
3044
3045 ~VPRegionBlock() override {
3046 if (Entry) {
3047 VPValue DummyValue;
3048 Entry->dropAllReferences(&DummyValue);
3049 deleteCFG(Entry);
3050 }
3051 }
3052
3053 /// Method to support type inquiry through isa, cast, and dyn_cast.
3054 static inline bool classof(const VPBlockBase *V) {
3055 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3056 }
3057
3058 const VPBlockBase *getEntry() const { return Entry; }
3059 VPBlockBase *getEntry() { return Entry; }
3060
3061 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3062 /// EntryBlock must have no predecessors.
3063 void setEntry(VPBlockBase *EntryBlock) {
3064 assert(EntryBlock->getPredecessors().empty() &&
3065 "Entry block cannot have predecessors.");
3066 Entry = EntryBlock;
3067 EntryBlock->setParent(this);
3068 }
3069
3070 const VPBlockBase *getExiting() const { return Exiting; }
3071 VPBlockBase *getExiting() { return Exiting; }
3072
3073 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3074 /// ExitingBlock must have no successors.
3075 void setExiting(VPBlockBase *ExitingBlock) {
3076 assert(ExitingBlock->getSuccessors().empty() &&
3077 "Exit block cannot have successors.");
3078 Exiting = ExitingBlock;
3079 ExitingBlock->setParent(this);
3080 }
3081
3082 /// Returns the pre-header VPBasicBlock of the loop region.
3084 assert(!isReplicator() && "should only get pre-header of loop regions");
3086 }
3087
3088 /// An indicator whether this region is to generate multiple replicated
3089 /// instances of output IR corresponding to its VPBlockBases.
3090 bool isReplicator() const { return IsReplicator; }
3091
3092 /// The method which generates the output IR instructions that correspond to
3093 /// this VPRegionBlock, thereby "executing" the VPlan.
3094 void execute(VPTransformState *State) override;
3095
3096 void dropAllReferences(VPValue *NewValue) override;
3097
3098#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3099 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3100 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3101 /// consequtive numbers.
3102 ///
3103 /// Note that the numbering is applied to the whole VPlan, so printing
3104 /// individual regions is consistent with the whole VPlan printing.
3105 void print(raw_ostream &O, const Twine &Indent,
3106 VPSlotTracker &SlotTracker) const override;
3107 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3108#endif
3109
3110 /// Clone all blocks in the single-entry single-exit region of the block and
3111 /// their recipes without updating the operands of the cloned recipes.
3112 VPRegionBlock *clone() override;
3113};
3114
3115/// VPlan models a candidate for vectorization, encoding various decisions take
3116/// to produce efficient output IR, including which branches, basic-blocks and
3117/// output IR instructions to generate, and their cost. VPlan holds a
3118/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3119/// VPBasicBlock.
3120class VPlan {
3121 friend class VPlanPrinter;
3122 friend class VPSlotTracker;
3123
3124 /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
3125 /// preheader of the vector loop.
3126 VPBasicBlock *Entry;
3127
3128 /// VPBasicBlock corresponding to the original preheader. Used to place
3129 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3130 /// rest of VPlan execution.
3131 VPBasicBlock *Preheader;
3132
3133 /// Holds the VFs applicable to this VPlan.
3135
3136 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3137 /// any UF.
3139
3140 /// Holds the name of the VPlan, for printing.
3141 std::string Name;
3142
3143 /// Represents the trip count of the original loop, for folding
3144 /// the tail.
3145 VPValue *TripCount = nullptr;
3146
3147 /// Represents the backedge taken count of the original loop, for folding
3148 /// the tail. It equals TripCount - 1.
3149 VPValue *BackedgeTakenCount = nullptr;
3150
3151 /// Represents the vector trip count.
3152 VPValue VectorTripCount;
3153
3154 /// Represents the loop-invariant VF * UF of the vector loop region.
3155 VPValue VFxUF;
3156
3157 /// Holds a mapping between Values and their corresponding VPValue inside
3158 /// VPlan.
3159 Value2VPValueTy Value2VPValue;
3160
3161 /// Contains all the external definitions created for this VPlan. External
3162 /// definitions are VPValues that hold a pointer to their underlying IR.
3163 SmallVector<VPValue *, 16> VPLiveInsToFree;
3164
3165 /// Values used outside the plan. It contains live-outs that need fixing. Any
3166 /// live-out that is fixed outside VPlan needs to be removed. The remaining
3167 /// live-outs are fixed via VPLiveOut::fixPhi.
3169
3170 /// Mapping from SCEVs to the VPValues representing their expansions.
3171 /// NOTE: This mapping is temporary and will be removed once all users have
3172 /// been modeled in VPlan directly.
3173 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3174
3175public:
3176 /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
3177 /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
3178 /// be disconnected, as the bypass blocks between them are not yet modeled in
3179 /// VPlan.
3180 VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
3181 : VPlan(Preheader, Entry) {
3182 TripCount = TC;
3183 }
3184
3185 /// Construct a VPlan with original preheader \p Preheader and \p Entry to
3186 /// the plan. At the moment, \p Preheader and \p Entry need to be
3187 /// disconnected, as the bypass blocks between them are not yet modeled in
3188 /// VPlan.
3189 VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
3190 : Entry(Entry), Preheader(Preheader) {
3191 Entry->setPlan(this);
3192 Preheader->setPlan(this);
3193 assert(Preheader->getNumSuccessors() == 0 &&
3194 Preheader->getNumPredecessors() == 0 &&
3195 "preheader must be disconnected");
3196 }
3197
3198 ~VPlan();
3199
3200 /// Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping
3201 /// original scalar pre-header \p PH) which contains SCEV expansions that need
3202 /// to happen before the CFG is modified; a VPBasicBlock for the vector
3203 /// pre-header, followed by a region for the vector loop, followed by the
3204 /// middle VPBasicBlock.
3205 static VPlanPtr createInitialVPlan(const SCEV *TripCount,
3206 ScalarEvolution &PSE, BasicBlock *PH);
3207
3208 /// Prepare the plan for execution, setting up the required live-in values.
3209 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3210 Value *CanonicalIVStartValue, VPTransformState &State);
3211
3212 /// Generate the IR code for this VPlan.
3213 void execute(VPTransformState *State);
3214
3215 VPBasicBlock *getEntry() { return Entry; }
3216 const VPBasicBlock *getEntry() const { return Entry; }
3217
3218 /// The trip count of the original loop.
3220 assert(TripCount && "trip count needs to be set before accessing it");
3221 return TripCount;
3222 }
3223
3224 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3225 /// the original trip count have been replaced.
3226 void resetTripCount(VPValue *NewTripCount) {
3227 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3228 "TripCount always must be set");
3229 TripCount = NewTripCount;
3230 }
3231
3232 /// The backedge taken count of the original loop.
3234 if (!BackedgeTakenCount)
3235 BackedgeTakenCount = new VPValue();
3236 return BackedgeTakenCount;
3237 }
3238
3239 /// The vector trip count.
3240 VPValue &getVectorTripCount() { return VectorTripCount; }
3241
3242 /// Returns VF * UF of the vector loop region.
3243 VPValue &getVFxUF() { return VFxUF; }
3244
3245 void addVF(ElementCount VF) { VFs.insert(VF); }
3246
3248 assert(hasVF(VF) && "Cannot set VF not already in plan");
3249 VFs.clear();
3250 VFs.insert(VF);
3251 }
3252
3253 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3255 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3256 }
3257
3258 /// Returns an iterator range over all VFs of the plan.
3261 return {VFs.begin(), VFs.end()};
3262 }
3263
3264 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3265
3266 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3267
3268 void setUF(unsigned UF) {
3269 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3270 UFs.clear();
3271 UFs.insert(UF);
3272 }
3273
3274 /// Return a string with the name of the plan and the applicable VFs and UFs.
3275 std::string getName() const;
3276
3277 void setName(const Twine &newName) { Name = newName.str(); }
3278
3279 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3280 /// yet) for \p V.
3282 assert(V && "Trying to get or add the VPValue of a null Value");
3283 if (!Value2VPValue.count(V)) {
3284 VPValue *VPV = new VPValue(V);
3285 VPLiveInsToFree.push_back(VPV);
3286 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3287 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3288 Value2VPValue[V] = VPV;
3289 }
3290
3291 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3292 assert(Value2VPValue[V]->isLiveIn() &&
3293 "Only live-ins should be in mapping");
3294 return Value2VPValue[V];
3295 }
3296
3297 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3298 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3299
3300#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3301 /// Print the live-ins of this VPlan to \p O.
3302 void printLiveIns(raw_ostream &O) const;
3303
3304 /// Print this VPlan to \p O.
3305 void print(raw_ostream &O) const;
3306
3307 /// Print this VPlan in DOT format to \p O.
3308 void printDOT(raw_ostream &O) const;
3309
3310 /// Dump the plan to stderr (for debugging).
3311 LLVM_DUMP_METHOD void dump() const;
3312#endif
3313
3314 /// Returns the VPRegionBlock of the vector loop.
3316 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3317 }
3319 return cast<VPRegionBlock>(getEntry()->getSingleSuccessor());
3320 }
3321
3322 /// Returns the canonical induction recipe of the vector loop.
3325 if (EntryVPBB->empty()) {
3326 // VPlan native path.
3327 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
3328 }
3329 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
3330 }
3331
3332 void addLiveOut(PHINode *PN, VPValue *V);
3333
3335 delete LiveOuts[PN];
3336 LiveOuts.erase(PN);
3337 }
3338
3340 return LiveOuts;
3341 }
3342
3343 VPValue *getSCEVExpansion(const SCEV *S) const {
3344 return SCEVToExpansion.lookup(S);
3345 }
3346
3347 void addSCEVExpansion(const SCEV *S, VPValue *V) {
3348 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
3349 SCEVToExpansion[S] = V;
3350 }
3351
3352 /// \return The block corresponding to the original preheader.
3353 VPBasicBlock *getPreheader() { return Preheader; }
3354 const VPBasicBlock *getPreheader() const { return Preheader; }
3355
3356 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
3357 /// recipes to refer to the clones, and return it.
3358 VPlan *duplicate();
3359};
3360
3361#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3362/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
3363/// indented and follows the dot format.
3365 raw_ostream &OS;
3366 const VPlan &Plan;
3367 unsigned Depth = 0;
3368 unsigned TabWidth = 2;
3369 std::string Indent;
3370 unsigned BID = 0;
3372
3374
3375 /// Handle indentation.
3376 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
3377
3378 /// Print a given \p Block of the Plan.
3379 void dumpBlock(const VPBlockBase *Block);
3380
3381 /// Print the information related to the CFG edges going out of a given
3382 /// \p Block, followed by printing the successor blocks themselves.
3383 void dumpEdges(const VPBlockBase *Block);
3384
3385 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
3386 /// its successor blocks.
3387 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
3388
3389 /// Print a given \p Region of the Plan.
3390 void dumpRegion(const VPRegionBlock *Region);
3391
3392 unsigned getOrCreateBID(const VPBlockBase *Block) {
3393 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
3394 }
3395
3396 Twine getOrCreateName(const VPBlockBase *Block);
3397
3398 Twine getUID(const VPBlockBase *Block);
3399
3400 /// Print the information related to a CFG edge between two VPBlockBases.
3401 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
3402 const Twine &Label);
3403
3404public:
3406 : OS(O), Plan(P), SlotTracker(&P) {}
3407
3408 LLVM_DUMP_METHOD void dump();
3409};
3410
3412 const Value *V;
3413
3414 VPlanIngredient(const Value *V) : V(V) {}
3415
3416 void print(raw_ostream &O) const;
3417};
3418
3420 I.print(OS);
3421 return OS;
3422}
3423
3425 Plan.print(OS);
3426 return OS;
3427}
3428#endif
3429
3430//===----------------------------------------------------------------------===//
3431// VPlan Utilities
3432//===----------------------------------------------------------------------===//
3433
3434/// Class that provides utilities for VPBlockBases in VPlan.
3436public:
3437 VPBlockUtils() = delete;
3438
3439 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
3440 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
3441 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
3442 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
3443 /// have neither successors nor predecessors.
3444 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
3445 assert(NewBlock->getSuccessors().empty() &&
3446 NewBlock->getPredecessors().empty() &&
3447 "Can't insert new block with predecessors or successors.");
3448 NewBlock->setParent(BlockPtr->getParent());
3449 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
3450 for (VPBlockBase *Succ : Succs) {
3451 disconnectBlocks(BlockPtr, Succ);
3452 connectBlocks(NewBlock, Succ);
3453 }
3454 connectBlocks(BlockPtr, NewBlock);
3455 }
3456
3457 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
3458 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
3459 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
3460 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
3461 /// and \p IfTrue and \p IfFalse must have neither successors nor
3462 /// predecessors.
3463 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
3464 VPBlockBase *BlockPtr) {
3465 assert(IfTrue->getSuccessors().empty() &&
3466 "Can't insert IfTrue with successors.");
3467 assert(IfFalse->getSuccessors().empty() &&
3468 "Can't insert IfFalse with successors.");
3469 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
3470 IfTrue->setPredecessors({BlockPtr});
3471 IfFalse->setPredecessors({BlockPtr});
3472 IfTrue->setParent(BlockPtr->getParent());
3473 IfFalse->setParent(BlockPtr->getParent());
3474 }
3475
3476 /// Connect VPBlockBases \p From and \p To bi-directionally. Append \p To to
3477 /// the successors of \p From and \p From to the predecessors of \p To. Both
3478 /// VPBlockBases must have the same parent, which can be null. Both
3479 /// VPBlockBases can be already connected to other VPBlockBases.
3481 assert((From->getParent() == To->getParent()) &&
3482 "Can't connect two block with different parents");
3483 assert(From->getNumSuccessors() < 2 &&
3484 "Blocks can't have more than two successors.");
3485 From->appendSuccessor(To);
3486 To->appendPredecessor(From);
3487 }
3488
3489 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
3490 /// from the successors of \p From and \p From from the predecessors of \p To.
3492 assert(To && "Successor to disconnect is null.");
3493 From->removeSuccessor(To);
3494 To->removePredecessor(From);
3495 }
3496
3497 /// Return an iterator range over \p Range which only includes \p BlockTy
3498 /// blocks. The accesses are casted to \p BlockTy.
3499 template <typename BlockTy, typename T>
3500 static auto blocksOnly(const T &Range) {
3501 // Create BaseTy with correct const-ness based on BlockTy.
3502 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
3503 const VPBlockBase, VPBlockBase>;
3504
3505 // We need to first create an iterator range over (const) BlocktTy & instead
3506 // of (const) BlockTy * for filter_range to work properly.
3507 auto Mapped =
3508 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
3510 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
3511 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
3512 return cast<BlockTy>(&Block);
3513 });
3514 }
3515};
3516
3519 InterleaveGroupMap;
3520
3521 /// Type for mapping of instruction based interleave groups to VPInstruction
3522 /// interleave groups
3525
3526 /// Recursively \p Region and populate VPlan based interleave groups based on
3527 /// \p IAI.
3528 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
3530 /// Recursively traverse \p Block and populate VPlan based interleave groups
3531 /// based on \p IAI.
3532 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
3534
3535public:
3537
3540 // Avoid releasing a pointer twice.
3541 for (auto &I : InterleaveGroupMap)
3542 DelSet.insert(I.second);
3543 for (auto *Ptr : DelSet)
3544 delete Ptr;
3545 }
3546
3547 /// Get the interleave group that \p Instr belongs to.
3548 ///
3549 /// \returns nullptr if doesn't have such group.
3552 return InterleaveGroupMap.lookup(Instr);
3553 }
3554};
3555
3556/// Class that maps (parts of) an existing VPlan to trees of combined
3557/// VPInstructions.
3559 enum class OpMode { Failed, Load, Opcode };
3560
3561 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
3562 /// DenseMap keys.
3563 struct BundleDenseMapInfo {
3564 static SmallVector<VPValue *, 4> getEmptyKey() {
3565 return {reinterpret_cast<VPValue *>(-1)};
3566 }
3567
3568 static SmallVector<VPValue *, 4> getTombstoneKey() {
3569 return {reinterpret_cast<VPValue *>(-2)};
3570 }
3571
3572 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
3573 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
3574 }
3575
3576 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
3578 return LHS == RHS;
3579 }
3580 };
3581
3582 /// Mapping of values in the original VPlan to a combined VPInstruction.
3584 BundleToCombined;
3585
3587
3588 /// Basic block to operate on. For now, only instructions in a single BB are
3589 /// considered.
3590 const VPBasicBlock &BB;
3591
3592 /// Indicates whether we managed to combine all visited instructions or not.
3593 bool CompletelySLP = true;
3594
3595 /// Width of the widest combined bundle in bits.
3596 unsigned WidestBundleBits = 0;
3597
3598 using MultiNodeOpTy =
3599 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
3600
3601 // Input operand bundles for the current multi node. Each multi node operand
3602 // bundle contains values not matching the multi node's opcode. They will
3603 // be reordered in reorderMultiNodeOps, once we completed building a
3604 // multi node.
3605 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
3606
3607 /// Indicates whether we are building a multi node currently.
3608 bool MultiNodeActive = false;
3609
3610 /// Check if we can vectorize Operands together.
3611 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
3612
3613 /// Add combined instruction \p New for the bundle \p Operands.
3614 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
3615
3616 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
3617 VPInstruction *markFailed();
3618
3619 /// Reorder operands in the multi node to maximize sequential memory access
3620 /// and commutative operations.
3621 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
3622
3623 /// Choose the best candidate to use for the lane after \p Last. The set of
3624 /// candidates to choose from are values with an opcode matching \p Last's
3625 /// or loads consecutive to \p Last.
3626 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
3627 SmallPtrSetImpl<VPValue *> &Candidates,
3629
3630#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3631 /// Print bundle \p Values to dbgs().
3632 void dumpBundle(ArrayRef<VPValue *> Values);
3633#endif
3634
3635public:
3636 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
3637
3638 ~VPlanSlp() = default;
3639
3640 /// Tries to build an SLP tree rooted at \p Operands and returns a
3641 /// VPInstruction combining \p Operands, if they can be combined.
3643
3644 /// Return the width of the widest combined bundle in bits.
3645 unsigned getWidestBundleBits() const { return WidestBundleBits; }
3646
3647 /// Return true if all visited instruction can be combined.
3648 bool isCompletelySLP() const { return CompletelySLP; }
3649};
3650
3651namespace vputils {
3652
3653/// Returns true if only the first lane of \p Def is used.
3654bool onlyFirstLaneUsed(const VPValue *Def);
3655
3656/// Returns true if only the first part of \p Def is used.
3657bool onlyFirstPartUsed(const VPValue *Def);
3658
3659/// Get or create a VPValue that corresponds to the expansion of \p Expr. If \p
3660/// Expr is a SCEVConstant or SCEVUnknown, return a VPValue wrapping the live-in
3661/// value. Otherwise return a VPExpandSCEVRecipe to expand \p Expr. If \p Plan's
3662/// pre-header already contains a recipe expanding \p Expr, return it. If not,
3663/// create a new one.
3665 ScalarEvolution &SE);
3666
3667/// Returns true if \p VPV is uniform after vectorization.
3669 // A value defined outside the vector region must be uniform after
3670 // vectorization inside a vector region.
3672 return true;
3673 VPRecipeBase *Def = VPV->getDefiningRecipe();
3674 assert(Def && "Must have definition for value defined inside vector region");
3675 if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
3676 return Rep->isUniform();
3677 if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
3678 return all_of(GEP->operands(), isUniformAfterVectorization);
3679 if (auto *VPI = dyn_cast<VPInstruction>(Def))
3680 return VPI->isVectorToScalar();
3681 return false;
3682}
3683
3684/// Return true if \p V is a header mask in \p Plan.
3685bool isHeaderMask(VPValue *V, VPlan &Plan);
3686
3687} // end namespace vputils
3688
3689} // end namespace llvm
3690
3691#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:391
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1293
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This file implements a map that provides insertion order iteration.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:821
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:319
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static GEPNoWrapFlags inBounds()
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:914
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:453
uint32_t getFactor() const
Definition: VectorUtils.h:469
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:523
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:595
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:173
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:696
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:289
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:112
ElementCount operator*() const
Definition: VPlan.h:120
iterator & operator++()
Definition: VPlan.h:122
iterator(ElementCount VF)
Definition: VPlan.h:116
bool operator==(const iterator &Other) const
Definition: VPlan.h:118
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:2640
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2648
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2654
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:2642
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:2844
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:2869
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:2916
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:2960
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:2871
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:2868
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:476
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:2894
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:2852
iterator end()
Definition: VPlan.h:2878
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:2856
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:2876
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:2870
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:2926
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:211
~VPBasicBlock() override
Definition: VPlan.h:2862
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:568
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:520
const_reverse_iterator rbegin() const
Definition: VPlan.h:2882
reverse_iterator rend()
Definition: VPlan.h:2883
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:543
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:2850
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:530
VPRecipeBase & back()
Definition: VPlan.h:2891
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:635
const VPRecipeBase & front() const
Definition: VPlan.h:2888
const_iterator begin() const
Definition: VPlan.h:2877
VPRecipeBase & front()
Definition: VPlan.h:2889
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:618
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:606
const VPRecipeBase & back() const
Definition: VPlan.h:2890
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:2907
bool empty() const
Definition: VPlan.h:2887
const_iterator end() const
Definition: VPlan.h:2879
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2902
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:2897
reverse_iterator rbegin()
Definition: VPlan.h:2881
size_t size() const
Definition: VPlan.h:2886
const_reverse_iterator rend() const
Definition: VPlan.h:2884
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:1967
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:1972
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2010
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:1990
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:1995
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account that the first incoming value has no mask.
Definition: VPlan.h:1987
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1978
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:425
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:620
VPRegionBlock * getParent()
Definition: VPlan.h:497
VPBlocksTy & getPredecessors()
Definition: VPlan.h:528
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:176
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:675
void setName(const Twine &newName)
Definition: VPlan.h:490
size_t getNumSuccessors() const
Definition: VPlan.h:542
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:525
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:623
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:640
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:665
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:578
size_t getNumPredecessors() const
Definition: VPlan.h:543
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:611
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:198
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:527
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:206
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:482
VPlan * getPlan()
Definition: VPlan.cpp:149
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:168
const VPRegionBlock * getParent() const
Definition: VPlan.h:498
void printAsOperand(raw_ostream &OS, bool PrintType) const
Definition: VPlan.h:651
const std::string & getName() const
Definition: VPlan.h:488
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:630
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:568
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:602
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:538
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:562
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:627
unsigned getVPBlockID() const
Definition: VPlan.h:495
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:474
VPBlocksTy & getSuccessors()
Definition: VPlan.h:523
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:190
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:154
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:591
void setParent(VPRegionBlock *P)
Definition: VPlan.h:508
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:584
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:532
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:522
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:3435
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:3500
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:3444
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:3463
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3491
static void connectBlocks(VPBlockBase *From, VPBlockBase *To)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:3480
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2234
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2266
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2254
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2236
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2242
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2273
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:2583
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:2624
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2598
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2590
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:2585
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2617
void execute(VPTransformState &State) override
Generate the canonical scalar induction phi of the vector loop.
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2612
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:308
unsigned getVPDefID() const
Definition: VPlanValue.h:428
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:2737
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2752
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getStepValue() const
Definition: VPlan.h:2782
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step)
Definition: VPlan.h:2745
Type * getScalarType() const
Definition: VPlan.h:2777
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2760
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2785
VPValue * getStartValue() const
Definition: VPlan.h:2781
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:2672
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:2685
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2679
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate phi for handling IV based on EVL over iterations correctly.
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:2674
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2694
Recipe to expand a SCEV expression.
Definition: VPlan.h:2551
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:2556
const SCEV * getSCEV() const
Definition: VPlan.h:2576
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2561
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:1653
static bool classof(const VPValue *V)
Definition: VPlan.h:1670
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:1655
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1697
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:1686
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:1694
VPValue * getStartValue() const
Definition: VPlan.h:1689
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:1666
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:1703
~VPHeaderPHIRecipe() override=default
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:2982
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:450
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:2986
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3008
~VPIRBasicBlock() override
Definition: VPlan.h:2991
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:2993
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3001
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1180
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1250
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1280
bool hasResult() const
Definition: VPlan.h:1311
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1186
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1196
@ CalculateTripCountMinusVF
Definition: VPlan.h:1194
unsigned getOpcode() const
Definition: VPlan.h:1287
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1262
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1255
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1267
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: VPlan.h:1304
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2024
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2103
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2065
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2036
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2071
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2057
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2078
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2094
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2098
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:3551
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:144
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:184
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:219
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:69
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:166
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:200
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:170
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:203
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:190
static VPLane getFirstLane()
Definition: VPlan.h:168
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:147
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:206
A value that is used outside the VPlan.
Definition: VPlan.h:686
VPLiveOut(PHINode *Phi, VPValue *Op)
Definition: VPlan.h:690
static bool classof(const VPUser *U)
Definition: VPlan.h:693
bool usesScalars(const VPValue *Op) const override
Returns true if the VPLiveOut uses scalars of operand Op.
Definition: VPlan.h:705
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the VPLiveOut to O.
PHINode * getPhi() const
Definition: VPlan.h:711
void fixPhi(VPlan &Plan, VPTransformState &State)
Fixup the wrapped LCSSA phi node in the unique exit block.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2285
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2309
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2289
void execute(VPTransformState &State) override
Generates phi nodes for live-outs as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2293
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:726
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:812
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:751
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:817
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:788
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:737
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:752
static bool classof(const VPUser *U)
Definition: VPlan.h:793
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:742
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:801
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:915
ExactFlagsTy ExactFlags
Definition: VPlan.h:971
FastMathFlagsTy FMFs
Definition: VPlan.h:974
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPFlagsTy GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1048
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:973
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:968
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1023
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1100
bool isInBounds() const
Definition: VPlan.h:1142
GEPFlagsTy GEPFlags
Definition: VPlan.h:972
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1054
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1035
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1069
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1149
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:993
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:970
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1029
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1041
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:979
WrapFlagsTy WrapFlags
Definition: VPlan.h:969
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1153
bool isDisjoint() const
Definition: VPlan.h:1165
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1136
bool hasNoSignedWrap() const
Definition: VPlan.h:1159
static bool classof(const VPUser *U)
Definition: VPlan.h:1063
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:986
A recipe for handling reduction phis.
Definition: VPlan.h:1908
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:1921
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:1959
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1931
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:1962
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1941
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:1954
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2113
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2150
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2119
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2152
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2148
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2131
void execute(VPTransformState &State) override
Generate the reduction in the loop.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3019
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:703
const VPBlockBase * getEntry() const
Definition: VPlan.h:3058
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3090
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:712
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3075
VPBlockBase * getExiting()
Definition: VPlan.h:3071
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3063
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:771
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3041
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3032
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:719
const VPBlockBase * getExiting() const
Definition: VPlan.h:3070
VPBlockBase * getEntry()
Definition: VPlan.h:3059
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3083
~VPRegionBlock() override
Definition: VPlan.h:3045
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3054
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2161
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2206
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2213
bool isUniform() const
Definition: VPlan.h:2201
bool isPredicated() const
Definition: VPlan.h:2203
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2180
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2170
unsigned getOpcode() const
Definition: VPlan.h:2230
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2225
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1429
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1443
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1459
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1457
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1437
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:2794
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2834
VPValue * getStepValue() const
Definition: VPlan.h:2831
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:2804
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2814
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:2798
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:843
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:849
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:906
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:858
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:909
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:846
static bool classof(const VPUser *U)
Definition: VPlan.h:898
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:854
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:449
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:36
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:203
operand_range operands()
Definition: VPlanValue.h:273
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:258
unsigned getNumOperands() const
Definition: VPlanValue.h:252
operand_iterator op_begin()
Definition: VPlanValue.h:269
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:253
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:247
Value * getUnderlyingValue()
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:119
unsigned getNumUsers() const
Definition: VPlanValue.h:112
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:173
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:168
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:133
bool isDefinedOutsideVectorRegions() const
Returns true if the VPValue is defined outside any vector regions, i.e.
Definition: VPlanValue.h:187
A recipe to compute the pointers for widened memory accesses of IndexTy for all parts.
Definition: VPlan.h:1597
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, bool IsReverse, bool IsInBounds, DebugLoc DL)
Definition: VPlan.h:1602
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1612
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1618
A recipe for widening Call instructions.
Definition: VPlan.h:1468
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1509
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1492
VPWidenCallRecipe(Value *UV, iterator_range< IterT > CallArguments, Intrinsic::ID VectorIntrinsicID, DebugLoc DL={}, Function *Variant=nullptr)
Definition: VPlan.h:1480
Function * getCalledScalarFunction() const
Definition: VPlan.h:1502
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1506
~VPWidenCallRecipe() override=default
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:2708
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2715
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:2710
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1379
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1387
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1422
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1425
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1397
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1403
A recipe for handling GEP instructions.
Definition: VPlan.h:1555
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1577
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1572
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:1710
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, TruncInst *Trunc)
Definition: VPlan.h:1723
const TruncInst * getTruncInst() const
Definition: VPlan.h:1771
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:1757
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1733
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:1770
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:1765
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc)
Definition: VPlan.h:1716
const VPValue * getStepValue() const
Definition: VPlan.h:1766
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:1784
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:1750
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1776
A common base class for widening memory operations.
Definition: VPlan.h:2318
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2329
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2326
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2365
static bool classof(const VPUser *U)
Definition: VPlan.h:2359
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2385
Instruction & Ingredient
Definition: VPlan.h:2320
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2348
Instruction & getIngredient() const
Definition: VPlan.h:2389
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2323
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2352
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2339
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2379
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2375
void setMask(VPValue *Mask)
Definition: VPlan.h:2331
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2372
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2369
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:1836
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:1866
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:1875
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:1842
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1848
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:1872
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1809
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:1824
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:1797
VPWidenRecipe is a recipe for producing a copy of vector type its ingredient.
Definition: VPlan.h:1347
void execute(VPTransformState &State) override
Produce widened copies of all Ingredients.
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1358
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1352
unsigned getOpcode() const
Definition: VPlan.h:1369
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:3364
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:3405
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1149
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:3558
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:3648
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:3636
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:3645
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3120
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1040
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1016
void prepareToExecute(Value *TripCount, Value *VectorTripCount, Value *CanonicalIVStartValue, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:821
bool hasScalableVF()
Definition: VPlan.h:3254
VPBasicBlock * getEntry()
Definition: VPlan.h:3215
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3240
void setName(const Twine &newName)
Definition: VPlan.h:3277
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3243
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3219
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3233
void removeLiveOut(PHINode *PN)
Definition: VPlan.h:3334
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3260
void addLiveOut(PHINode *PN, VPValue *V)
Definition: VPlan.cpp:1049
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3216
VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader, trip count TC and Entry to the plan.
Definition: VPlan.h:3180
VPBasicBlock * getPreheader()
Definition: VPlan.h:3353
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.h:3315
const VPRegionBlock * getVectorLoopRegion() const
Definition: VPlan.h:3318
bool hasVF(ElementCount VF)
Definition: VPlan.h:3253
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:3347
bool hasUF(unsigned UF) const
Definition: VPlan.h:3266
void setVF(ElementCount VF)
Definition: VPlan.h:3247
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3226
VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
Construct a VPlan with original preheader Preheader and Entry to the plan.
Definition: VPlan.h:3189
const VPBasicBlock * getPreheader() const
Definition: VPlan.h:3354
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3281
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1046
bool hasScalarVFOnly() const
Definition: VPlan.h:3264
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:876
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:3323
const MapVector< PHINode *, VPLiveOut * > & getLiveOuts() const
Definition: VPlan.h:3339
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:990
void addVF(ElementCount VF)
Definition: VPlan.h:3245
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3298
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:3343
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:960
void setUF(unsigned UF)
Definition: VPlan.h:3268
static VPlanPtr createInitialVPlan(const SCEV *TripCount, ScalarEvolution &PSE, BasicBlock *PH)
Create initial VPlan skeleton, having an "entry" VPBasicBlock (wrapping original scalar pre-header PH...
Definition: VPlan.cpp:806
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1092
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, ScalarEvolution &SE)
Get or create a VPValue that corresponds to the expansion of Expr.
Definition: VPlan.cpp:1477
bool isUniformAfterVectorization(VPValue *VPV)
Returns true if VPV is uniform after vectorization.
Definition: VPlan.h:3668
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition: VPlan.cpp:1472
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition: VPlan.cpp:1467
bool isHeaderMask(VPValue *V, VPlan &Plan)
Return true if V is a header mask in Plan.
Definition: VPlan.cpp:1494
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
const SCEV * createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *OrigLoop)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:135
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
@ Other
Any other memory.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:88
iterator end()
Definition: VPlan.h:129
const ElementCount Start
Definition: VPlan.h:90
ElementCount End
Definition: VPlan.h:93
iterator begin()
Definition: VPlan.h:128
bool isEmpty() const
Definition: VPlan.h:95
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:99
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:1881
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1891
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:1882
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:1887
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPIteration represents a single point in the iteration space of the output (vectorized and/or unrolle...
Definition: VPlan.h:226
VPIteration(unsigned Part, const VPLane &Lane)
Definition: VPlan.h:236
unsigned Part
in [0..UF)
Definition: VPlan.h:228
VPLane Lane
Definition: VPlan.h:230
VPIteration(unsigned Part, unsigned Lane, VPLane::Kind Kind=VPLane::Kind::First)
Definition: VPlan.h:232
bool isFirstIteration() const
Definition: VPlan.h:238
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:932
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:366
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:372
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:380
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:368
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:376
CFGState(DominatorTree *DT)
Definition: VPlan.h:385
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:354
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:383
SmallVector< Value *, 2 > PerPartValuesTy
A type for vectorized values in the new loop.
Definition: VPlan.h:261
DenseMap< VPValue *, ScalarsPerPartValuesTy > PerPartScalars
Definition: VPlan.h:266
DenseMap< VPValue *, PerPartValuesTy > PerPartOutput
Definition: VPlan.h:263
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:243
Value * get(VPValue *Def, unsigned Part, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def and a given Part if IsScalar is false,...
Definition: VPlan.cpp:253
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:394
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:417
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:420
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:367
void reset(VPValue *Def, Value *V, unsigned Part)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:310
struct llvm::VPTransformState::CFGState CFG
void reset(VPValue *Def, Value *V, const VPIteration &Instance)
Reset an existing scalar value for Def and a given Instance.
Definition: VPlan.h:332
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:413
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:359
void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:399
void set(VPValue *Def, Value *V, const VPIteration &Instance)
Set the generated scalar V for Def and the given Instance.
Definition: VPlan.h:318
void set(VPValue *Def, Value *V, unsigned Part, bool IsScalar=false)
Set the generated vector Value for a given VPValue and a given Part, if IsScalar is false.
Definition: VPlan.h:295
std::optional< VPIteration > Instance
Hold the indices to generate specific scalar instructions.
Definition: VPlan.h:255
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:397
bool hasScalarValue(VPValue *Def, VPIteration Instance)
Definition: VPlan.h:283
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:403
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:400
bool hasVectorValue(VPValue *Def, unsigned Part)
Definition: VPlan.h:277
ElementCount VF
The chosen Vectorization and Unroll Factors of the loop being vectorized.
Definition: VPlan.h:249
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:406
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:378
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2433
void execute(VPTransformState &State) override
Generate the wide load or gather.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2445
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenLoadEVLRecipe(VPWidenLoadRecipe *L, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2434
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2457
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2394
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2395
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2421
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2403
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1521
bool isInvariantCond() const
Definition: VPlan.h:1549
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1529
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1523
VPValue * getCond() const
Definition: VPlan.h:1545
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:2509
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:2521
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenStoreEVLRecipe(VPWidenStoreRecipe *S, VPValue *EVL, VPValue *Mask)
Definition: VPlan.h:2510
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2536
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:2524
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:2468
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2497
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2469
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:2485
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2476
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:3414
const Value * V
Definition: VPlan.h:3412
void print(raw_ostream &O) const
Definition: VPlan.cpp:1267