LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/Twine.h"
35#include "llvm/ADT/ilist.h"
36#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86/// A helper function that returns the reciprocal of the block probability of
87/// predicated blocks. If we return X, we are assuming the predicated block
88/// will execute once for every X iterations of the loop header.
89///
90/// TODO: We should use actual block probability here, if available. Currently,
91/// we always assume predicated blocks have a 50% chance of executing.
92inline unsigned getReciprocalPredBlockProb() { return 2; }
93
94/// A range of powers-of-2 vectorization factors with fixed start and
95/// adjustable end. The range includes start and excludes end, e.g.,:
96/// [1, 16) = {1, 2, 4, 8}
97struct VFRange {
98 // A power of 2.
100
101 // A power of 2. If End <= Start range is empty.
103
104 bool isEmpty() const {
106 }
107
109 : Start(Start), End(End) {
111 "Both Start and End should have the same scalable flag");
113 "Expected Start to be a power of 2");
115 "Expected End to be a power of 2");
116 }
117
118 /// Iterator to iterate over vectorization factors in a VFRange.
120 : public iterator_facade_base<iterator, std::forward_iterator_tag,
121 ElementCount> {
122 ElementCount VF;
123
124 public:
125 iterator(ElementCount VF) : VF(VF) {}
126
127 bool operator==(const iterator &Other) const { return VF == Other.VF; }
128
129 ElementCount operator*() const { return VF; }
130
132 VF *= 2;
133 return *this;
134 }
135 };
136
140 return iterator(End);
141 }
142};
143
144using VPlanPtr = std::unique_ptr<VPlan>;
145
146/// In what follows, the term "input IR" refers to code that is fed into the
147/// vectorizer whereas the term "output IR" refers to code that is generated by
148/// the vectorizer.
149
150/// VPLane provides a way to access lanes in both fixed width and scalable
151/// vectors, where for the latter the lane index sometimes needs calculating
152/// as a runtime expression.
153class VPLane {
154public:
155 /// Kind describes how to interpret Lane.
156 enum class Kind : uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
159 First,
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
165 };
166
167private:
168 /// in [0..VF)
169 unsigned Lane;
170
171 /// Indicates how the Lane should be interpreted, as described above.
172 Kind LaneKind;
173
174public:
175 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
176 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
177
179
180 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
181 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
184 Kind LaneKind;
185 if (VF.isScalable())
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
189 else
190 LaneKind = VPLane::Kind::First;
191 return VPLane(LaneOffset, LaneKind);
192 }
193
195 return getLaneFromEnd(VF, 1);
196 }
197
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind == Kind::First);
202 return Lane;
203 }
204
205 /// Returns an expression describing the lane index that can be used at
206 /// runtime.
207 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
208
209 /// Returns the Kind of lane offset.
210 Kind getKind() const { return LaneKind; }
211
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
214
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount &VF) const {
217 switch (LaneKind) {
219 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
220 return VF.getKnownMinValue() + Lane;
221 default:
222 assert(Lane < VF.getKnownMinValue());
223 return Lane;
224 }
225 }
226
227 /// Returns the maxmimum number of lanes that we are able to consider
228 /// caching for \p VF.
229 static unsigned getNumCachedLanes(const ElementCount &VF) {
230 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
231 }
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
240 Loop *CurrentParentLoop, Type *CanonicalIVTy);
241 /// Target Transform Info.
243
244 /// The chosen Vectorization Factor of the loop being vectorized.
246
247 /// Hold the index to generate specific scalar instructions. Null indicates
248 /// that all instances are to be generated, using either scalar or vector
249 /// instructions.
250 std::optional<VPLane> Lane;
251
252 struct DataState {
253 // Each value from the original loop, when vectorized, is represented by a
254 // vector value in the map.
256
259
260 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
261 /// is false, otherwise return the generated scalar. \See set.
262 Value *get(VPValue *Def, bool IsScalar = false);
263
264 /// Get the generated Value for a given VPValue and given Part and Lane.
265 Value *get(VPValue *Def, const VPLane &Lane);
266
267 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
268
270 auto I = Data.VPV2Scalars.find(Def);
271 if (I == Data.VPV2Scalars.end())
272 return false;
273 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
274 return CacheIdx < I->second.size() && I->second[CacheIdx];
275 }
276
277 /// Set the generated vector Value for a given VPValue, if \p
278 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
279 void set(VPValue *Def, Value *V, bool IsScalar = false) {
280 if (IsScalar) {
281 set(Def, V, VPLane(0));
282 return;
283 }
284 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
285 "scalar values must be stored as (0, 0)");
286 Data.VPV2Vector[Def] = V;
287 }
288
289 /// Reset an existing vector value for \p Def and a given \p Part.
290 void reset(VPValue *Def, Value *V) {
291 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
292 Data.VPV2Vector[Def] = V;
293 }
294
295 /// Set the generated scalar \p V for \p Def and the given \p Lane.
296 void set(VPValue *Def, Value *V, const VPLane &Lane) {
297 auto &Scalars = Data.VPV2Scalars[Def];
298 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
299 if (Scalars.size() <= CacheIdx)
300 Scalars.resize(CacheIdx + 1);
301 assert(!Scalars[CacheIdx] && "should overwrite existing value");
302 Scalars[CacheIdx] = V;
303 }
304
305 /// Reset an existing scalar value for \p Def and a given \p Lane.
306 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
307 auto Iter = Data.VPV2Scalars.find(Def);
308 assert(Iter != Data.VPV2Scalars.end() &&
309 "need to overwrite existing value");
310 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
311 assert(CacheIdx < Iter->second.size() &&
312 "need to overwrite existing value");
313 Iter->second[CacheIdx] = V;
314 }
315
316 /// Add additional metadata to \p To that was not present on \p Orig.
317 ///
318 /// Currently this is used to add the noalias annotations based on the
319 /// inserted memchecks. Use this for instructions that are *cloned* into the
320 /// vector loop.
321 void addNewMetadata(Instruction *To, const Instruction *Orig);
322
323 /// Add metadata from one instruction to another.
324 ///
325 /// This includes both the original MDs from \p From and additional ones (\see
326 /// addNewMetadata). Use this for *newly created* instructions in the vector
327 /// loop.
328 void addMetadata(Value *To, Instruction *From);
329
330 /// Set the debug location in the builder using the debug location \p DL.
332
333 /// Construct the vector value of a scalarized value \p V one lane at a time.
334 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
335
336 /// Hold state information used when constructing the CFG of the output IR,
337 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
338 struct CFGState {
339 /// The previous VPBasicBlock visited. Initially set to null.
341
342 /// The previous IR BasicBlock created or used. Initially set to the new
343 /// header BasicBlock.
344 BasicBlock *PrevBB = nullptr;
345
346 /// The last IR BasicBlock in the output IR. Set to the exit block of the
347 /// vector loop.
348 BasicBlock *ExitBB = nullptr;
349
350 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
351 /// of replication, maps the BasicBlock of the last replica created.
353
354 /// Updater for the DominatorTree.
356
358 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
359
360 /// Returns the BasicBlock* mapped to the pre-header of the loop region
361 /// containing \p R.
364
365 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
367
368 /// Hold a reference to the IRBuilder used to generate output IR code.
370
371 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
373
374 /// Pointer to the VPlan code is generated for.
376
377 /// The parent loop object for the current scope, or nullptr.
379
380 /// LoopVersioning. It's only set up (non-null) if memchecks were
381 /// used.
382 ///
383 /// This is currently only used to add no-alias metadata based on the
384 /// memchecks. The actually versioning is performed manually.
386
387 /// Map SCEVs to their expanded values. Populated when executing
388 /// VPExpandSCEVRecipes.
390
391 /// VPlan-based type analysis.
393};
394
395/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
396/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
398 friend class VPBlockUtils;
399
400 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
401
402 /// An optional name for the block.
403 std::string Name;
404
405 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
406 /// it is a topmost VPBlockBase.
407 VPRegionBlock *Parent = nullptr;
408
409 /// List of predecessor blocks.
411
412 /// List of successor blocks.
414
415 /// VPlan containing the block. Can only be set on the entry block of the
416 /// plan.
417 VPlan *Plan = nullptr;
418
419 /// Add \p Successor as the last successor to this block.
420 void appendSuccessor(VPBlockBase *Successor) {
421 assert(Successor && "Cannot add nullptr successor!");
422 Successors.push_back(Successor);
423 }
424
425 /// Add \p Predecessor as the last predecessor to this block.
426 void appendPredecessor(VPBlockBase *Predecessor) {
427 assert(Predecessor && "Cannot add nullptr predecessor!");
428 Predecessors.push_back(Predecessor);
429 }
430
431 /// Remove \p Predecessor from the predecessors of this block.
432 void removePredecessor(VPBlockBase *Predecessor) {
433 auto Pos = find(Predecessors, Predecessor);
434 assert(Pos && "Predecessor does not exist");
435 Predecessors.erase(Pos);
436 }
437
438 /// Remove \p Successor from the successors of this block.
439 void removeSuccessor(VPBlockBase *Successor) {
440 auto Pos = find(Successors, Successor);
441 assert(Pos && "Successor does not exist");
442 Successors.erase(Pos);
443 }
444
445 /// This function replaces one predecessor with another, useful when
446 /// trying to replace an old block in the CFG with a new one.
447 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
448 auto I = find(Predecessors, Old);
449 assert(I != Predecessors.end());
450 assert(Old->getParent() == New->getParent() &&
451 "replaced predecessor must have the same parent");
452 *I = New;
453 }
454
455 /// This function replaces one successor with another, useful when
456 /// trying to replace an old block in the CFG with a new one.
457 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
458 auto I = find(Successors, Old);
459 assert(I != Successors.end());
460 assert(Old->getParent() == New->getParent() &&
461 "replaced successor must have the same parent");
462 *I = New;
463 }
464
465protected:
466 VPBlockBase(const unsigned char SC, const std::string &N)
467 : SubclassID(SC), Name(N) {}
468
469public:
470 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
471 /// that are actually instantiated. Values of this enumeration are kept in the
472 /// SubclassID field of the VPBlockBase objects. They are used for concrete
473 /// type identification.
474 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
475
477
478 virtual ~VPBlockBase() = default;
479
480 const std::string &getName() const { return Name; }
481
482 void setName(const Twine &newName) { Name = newName.str(); }
483
484 /// \return an ID for the concrete type of this object.
485 /// This is used to implement the classof checks. This should not be used
486 /// for any other purpose, as the values may change as LLVM evolves.
487 unsigned getVPBlockID() const { return SubclassID; }
488
489 VPRegionBlock *getParent() { return Parent; }
490 const VPRegionBlock *getParent() const { return Parent; }
491
492 /// \return A pointer to the plan containing the current block.
493 VPlan *getPlan();
494 const VPlan *getPlan() const;
495
496 /// Sets the pointer of the plan containing the block. The block must be the
497 /// entry block into the VPlan.
498 void setPlan(VPlan *ParentPlan);
499
500 void setParent(VPRegionBlock *P) { Parent = P; }
501
502 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
503 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
504 /// VPBlockBase is a VPBasicBlock, it is returned.
505 const VPBasicBlock *getEntryBasicBlock() const;
507
508 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
509 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
510 /// VPBlockBase is a VPBasicBlock, it is returned.
511 const VPBasicBlock *getExitingBasicBlock() const;
513
514 const VPBlocksTy &getSuccessors() const { return Successors; }
515 VPBlocksTy &getSuccessors() { return Successors; }
516
519
520 const VPBlocksTy &getPredecessors() const { return Predecessors; }
521 VPBlocksTy &getPredecessors() { return Predecessors; }
522
523 /// \return the successor of this VPBlockBase if it has a single successor.
524 /// Otherwise return a null pointer.
526 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
527 }
528
529 /// \return the predecessor of this VPBlockBase if it has a single
530 /// predecessor. Otherwise return a null pointer.
532 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
533 }
534
535 size_t getNumSuccessors() const { return Successors.size(); }
536 size_t getNumPredecessors() const { return Predecessors.size(); }
537
538 /// An Enclosing Block of a block B is any block containing B, including B
539 /// itself. \return the closest enclosing block starting from "this", which
540 /// has successors. \return the root enclosing block if all enclosing blocks
541 /// have no successors.
543
544 /// \return the closest enclosing block starting from "this", which has
545 /// predecessors. \return the root enclosing block if all enclosing blocks
546 /// have no predecessors.
548
549 /// \return the successors either attached directly to this VPBlockBase or, if
550 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
551 /// successors of its own, search recursively for the first enclosing
552 /// VPRegionBlock that has successors and return them. If no such
553 /// VPRegionBlock exists, return the (empty) successors of the topmost
554 /// VPBlockBase reached.
557 }
558
559 /// \return the hierarchical successor of this VPBlockBase if it has a single
560 /// hierarchical successor. Otherwise return a null pointer.
563 }
564
565 /// \return the predecessors either attached directly to this VPBlockBase or,
566 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
567 /// predecessors of its own, search recursively for the first enclosing
568 /// VPRegionBlock that has predecessors and return them. If no such
569 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
570 /// VPBlockBase reached.
573 }
574
575 /// \return the hierarchical predecessor of this VPBlockBase if it has a
576 /// single hierarchical predecessor. Otherwise return a null pointer.
579 }
580
581 /// Set a given VPBlockBase \p Successor as the single successor of this
582 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
583 /// This VPBlockBase must have no successors.
585 assert(Successors.empty() && "Setting one successor when others exist.");
586 assert(Successor->getParent() == getParent() &&
587 "connected blocks must have the same parent");
588 appendSuccessor(Successor);
589 }
590
591 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
592 /// successors of this VPBlockBase. This VPBlockBase is not added as
593 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
594 /// successors.
595 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
596 assert(Successors.empty() && "Setting two successors when others exist.");
597 appendSuccessor(IfTrue);
598 appendSuccessor(IfFalse);
599 }
600
601 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
602 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
603 /// as successor of any VPBasicBlock in \p NewPreds.
605 assert(Predecessors.empty() && "Block predecessors already set.");
606 for (auto *Pred : NewPreds)
607 appendPredecessor(Pred);
608 }
609
610 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
611 /// This VPBlockBase must have no successors. This VPBlockBase is not added
612 /// as predecessor of any VPBasicBlock in \p NewSuccs.
614 assert(Successors.empty() && "Block successors already set.");
615 for (auto *Succ : NewSuccs)
616 appendSuccessor(Succ);
617 }
618
619 /// Remove all the predecessor of this block.
620 void clearPredecessors() { Predecessors.clear(); }
621
622 /// Remove all the successors of this block.
623 void clearSuccessors() { Successors.clear(); }
624
625 /// Swap successors of the block. The block must have exactly 2 successors.
626 // TODO: This should be part of introducing conditional branch recipes rather
627 // than being independent.
629 assert(Successors.size() == 2 && "must have 2 successors to swap");
630 std::swap(Successors[0], Successors[1]);
631 }
632
633 /// The method which generates the output IR that correspond to this
634 /// VPBlockBase, thereby "executing" the VPlan.
635 virtual void execute(VPTransformState *State) = 0;
636
637 /// Return the cost of the block.
639
640 /// Return true if it is legal to hoist instructions into this block.
642 // There are currently no constraints that prevent an instruction to be
643 // hoisted into a VPBlockBase.
644 return true;
645 }
646
647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
648 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
649 OS << getName();
650 }
651
652 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
653 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
654 /// consequtive numbers.
655 ///
656 /// Note that the numbering is applied to the whole VPlan, so printing
657 /// individual blocks is consistent with the whole VPlan printing.
658 virtual void print(raw_ostream &O, const Twine &Indent,
659 VPSlotTracker &SlotTracker) const = 0;
660
661 /// Print plain-text dump of this VPlan to \p O.
662 void print(raw_ostream &O) const {
664 print(O, "", SlotTracker);
665 }
666
667 /// Print the successors of this block to \p O, prefixing all lines with \p
668 /// Indent.
669 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
670
671 /// Dump this VPBlockBase to dbgs().
672 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
673#endif
674
675 /// Clone the current block and it's recipes without updating the operands of
676 /// the cloned recipes, including all blocks in the single-entry single-exit
677 /// region for VPRegionBlocks.
678 virtual VPBlockBase *clone() = 0;
679};
680
681/// Struct to hold various analysis needed for cost computations.
689
692 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
693 CM(CM) {}
694
695 /// Return the cost for \p UI with \p VF using the legacy cost model as
696 /// fallback until computing the cost of all recipes migrates to VPlan.
698
699 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
700 /// has already been pre-computed.
701 bool skipCostComputation(Instruction *UI, bool IsVector) const;
702
703 /// Returns the OperandInfo for \p V, if it is a live-in.
705};
706
707/// VPRecipeBase is a base class modeling a sequence of one or more output IR
708/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
709/// and is responsible for deleting its defined values. Single-value
710/// recipes must inherit from VPSingleDef instead of inheriting from both
711/// VPRecipeBase and VPValue separately.
712class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
713 public VPDef,
714 public VPUser {
715 friend VPBasicBlock;
716 friend class VPBlockUtils;
717
718 /// Each VPRecipe belongs to a single VPBasicBlock.
719 VPBasicBlock *Parent = nullptr;
720
721 /// The debug location for the recipe.
722 DebugLoc DL;
723
724public:
726 DebugLoc DL = {})
727 : VPDef(SC), VPUser(Operands), DL(DL) {}
728
729 template <typename IterT>
731 DebugLoc DL = {})
732 : VPDef(SC), VPUser(Operands), DL(DL) {}
733 virtual ~VPRecipeBase() = default;
734
735 /// Clone the current recipe.
736 virtual VPRecipeBase *clone() = 0;
737
738 /// \return the VPBasicBlock which this VPRecipe belongs to.
739 VPBasicBlock *getParent() { return Parent; }
740 const VPBasicBlock *getParent() const { return Parent; }
741
742 /// The method which generates the output IR instructions that correspond to
743 /// this VPRecipe, thereby "executing" the VPlan.
744 virtual void execute(VPTransformState &State) = 0;
745
746 /// Return the cost of this recipe, taking into account if the cost
747 /// computation should be skipped and the ForceTargetInstructionCost flag.
748 /// Also takes care of printing the cost for debugging.
750
751 /// Insert an unlinked recipe into a basic block immediately before
752 /// the specified recipe.
753 void insertBefore(VPRecipeBase *InsertPos);
754 /// Insert an unlinked recipe into \p BB immediately before the insertion
755 /// point \p IP;
757
758 /// Insert an unlinked Recipe into a basic block immediately after
759 /// the specified Recipe.
760 void insertAfter(VPRecipeBase *InsertPos);
761
762 /// Unlink this recipe from its current VPBasicBlock and insert it into
763 /// the VPBasicBlock that MovePos lives in, right after MovePos.
764 void moveAfter(VPRecipeBase *MovePos);
765
766 /// Unlink this recipe and insert into BB before I.
767 ///
768 /// \pre I is a valid iterator into BB.
770
771 /// This method unlinks 'this' from the containing basic block, but does not
772 /// delete it.
773 void removeFromParent();
774
775 /// This method unlinks 'this' from the containing basic block and deletes it.
776 ///
777 /// \returns an iterator pointing to the element after the erased one
779
780 /// Method to support type inquiry through isa, cast, and dyn_cast.
781 static inline bool classof(const VPDef *D) {
782 // All VPDefs are also VPRecipeBases.
783 return true;
784 }
785
786 static inline bool classof(const VPUser *U) { return true; }
787
788 /// Returns true if the recipe may have side-effects.
789 bool mayHaveSideEffects() const;
790
791 /// Returns true for PHI-like recipes.
792 bool isPhi() const {
793 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
794 }
795
796 /// Returns true if the recipe may read from memory.
797 bool mayReadFromMemory() const;
798
799 /// Returns true if the recipe may write to memory.
800 bool mayWriteToMemory() const;
801
802 /// Returns true if the recipe may read from or write to memory.
803 bool mayReadOrWriteMemory() const {
805 }
806
807 /// Returns the debug location of the recipe.
808 DebugLoc getDebugLoc() const { return DL; }
809
810protected:
811 /// Compute the cost of this recipe either using a recipe's specialized
812 /// implementation or using the legacy cost model and the underlying
813 /// instructions.
815 VPCostContext &Ctx) const;
816};
817
818// Helper macro to define common classof implementations for recipes.
819#define VP_CLASSOF_IMPL(VPDefID) \
820 static inline bool classof(const VPDef *D) { \
821 return D->getVPDefID() == VPDefID; \
822 } \
823 static inline bool classof(const VPValue *V) { \
824 auto *R = V->getDefiningRecipe(); \
825 return R && R->getVPDefID() == VPDefID; \
826 } \
827 static inline bool classof(const VPUser *U) { \
828 auto *R = dyn_cast<VPRecipeBase>(U); \
829 return R && R->getVPDefID() == VPDefID; \
830 } \
831 static inline bool classof(const VPRecipeBase *R) { \
832 return R->getVPDefID() == VPDefID; \
833 } \
834 static inline bool classof(const VPSingleDefRecipe *R) { \
835 return R->getVPDefID() == VPDefID; \
836 }
837
838/// VPSingleDef is a base class for recipes for modeling a sequence of one or
839/// more output IR that define a single result VPValue.
840/// Note that VPRecipeBase must be inherited from before VPValue.
841class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
842public:
843 template <typename IterT>
844 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
845 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
846
847 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
848 DebugLoc DL = {})
849 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
850
851 template <typename IterT>
852 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
853 DebugLoc DL = {})
854 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
855
856 static inline bool classof(const VPRecipeBase *R) {
857 switch (R->getVPDefID()) {
858 case VPRecipeBase::VPDerivedIVSC:
859 case VPRecipeBase::VPEVLBasedIVPHISC:
860 case VPRecipeBase::VPExpandSCEVSC:
861 case VPRecipeBase::VPInstructionSC:
862 case VPRecipeBase::VPReductionEVLSC:
863 case VPRecipeBase::VPReductionSC:
864 case VPRecipeBase::VPReplicateSC:
865 case VPRecipeBase::VPScalarIVStepsSC:
866 case VPRecipeBase::VPVectorPointerSC:
867 case VPRecipeBase::VPReverseVectorPointerSC:
868 case VPRecipeBase::VPWidenCallSC:
869 case VPRecipeBase::VPWidenCanonicalIVSC:
870 case VPRecipeBase::VPWidenCastSC:
871 case VPRecipeBase::VPWidenGEPSC:
872 case VPRecipeBase::VPWidenIntrinsicSC:
873 case VPRecipeBase::VPWidenSC:
874 case VPRecipeBase::VPWidenEVLSC:
875 case VPRecipeBase::VPWidenSelectSC:
876 case VPRecipeBase::VPBlendSC:
877 case VPRecipeBase::VPPredInstPHISC:
878 case VPRecipeBase::VPCanonicalIVPHISC:
879 case VPRecipeBase::VPActiveLaneMaskPHISC:
880 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
881 case VPRecipeBase::VPWidenPHISC:
882 case VPRecipeBase::VPWidenIntOrFpInductionSC:
883 case VPRecipeBase::VPWidenPointerInductionSC:
884 case VPRecipeBase::VPReductionPHISC:
885 case VPRecipeBase::VPScalarCastSC:
886 case VPRecipeBase::VPPartialReductionSC:
887 return true;
888 case VPRecipeBase::VPBranchOnMaskSC:
889 case VPRecipeBase::VPInterleaveSC:
890 case VPRecipeBase::VPIRInstructionSC:
891 case VPRecipeBase::VPWidenLoadEVLSC:
892 case VPRecipeBase::VPWidenLoadSC:
893 case VPRecipeBase::VPWidenStoreEVLSC:
894 case VPRecipeBase::VPWidenStoreSC:
895 case VPRecipeBase::VPHistogramSC:
896 // TODO: Widened stores don't define a value, but widened loads do. Split
897 // the recipes to be able to make widened loads VPSingleDefRecipes.
898 return false;
899 }
900 llvm_unreachable("Unhandled VPDefID");
901 }
902
903 static inline bool classof(const VPUser *U) {
904 auto *R = dyn_cast<VPRecipeBase>(U);
905 return R && classof(R);
906 }
907
908 virtual VPSingleDefRecipe *clone() override = 0;
909
910 /// Returns the underlying instruction.
912 return cast<Instruction>(getUnderlyingValue());
913 }
915 return cast<Instruction>(getUnderlyingValue());
916 }
917
918#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
919 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
920 LLVM_DUMP_METHOD void dump() const;
921#endif
922};
923
924/// Class to record LLVM IR flag for a recipe along with it.
926 enum class OperationType : unsigned char {
927 Cmp,
928 OverflowingBinOp,
929 DisjointOp,
930 PossiblyExactOp,
931 GEPOp,
932 FPMathOp,
933 NonNegOp,
934 Other
935 };
936
937public:
938 struct WrapFlagsTy {
939 char HasNUW : 1;
940 char HasNSW : 1;
941
943 };
944
946 char IsDisjoint : 1;
948 };
949
950private:
951 struct ExactFlagsTy {
952 char IsExact : 1;
953 };
954 struct NonNegFlagsTy {
955 char NonNeg : 1;
956 };
957 struct FastMathFlagsTy {
958 char AllowReassoc : 1;
959 char NoNaNs : 1;
960 char NoInfs : 1;
961 char NoSignedZeros : 1;
962 char AllowReciprocal : 1;
963 char AllowContract : 1;
964 char ApproxFunc : 1;
965
966 FastMathFlagsTy(const FastMathFlags &FMF);
967 };
968
969 OperationType OpType;
970
971 union {
975 ExactFlagsTy ExactFlags;
977 NonNegFlagsTy NonNegFlags;
978 FastMathFlagsTy FMFs;
979 unsigned AllFlags;
980 };
981
982protected:
984 OpType = Other.OpType;
985 AllFlags = Other.AllFlags;
986 }
987
988public:
989 template <typename IterT>
990 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
991 : VPSingleDefRecipe(SC, Operands, DL) {
992 OpType = OperationType::Other;
993 AllFlags = 0;
994 }
995
996 template <typename IterT>
997 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
999 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1000 OpType = OperationType::Cmp;
1001 CmpPredicate = Op->getPredicate();
1002 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1003 OpType = OperationType::DisjointOp;
1004 DisjointFlags.IsDisjoint = Op->isDisjoint();
1005 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1006 OpType = OperationType::OverflowingBinOp;
1007 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1008 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1009 OpType = OperationType::PossiblyExactOp;
1010 ExactFlags.IsExact = Op->isExact();
1011 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1012 OpType = OperationType::GEPOp;
1013 GEPFlags = GEP->getNoWrapFlags();
1014 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1015 OpType = OperationType::NonNegOp;
1016 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1017 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1018 OpType = OperationType::FPMathOp;
1019 FMFs = Op->getFastMathFlags();
1020 } else {
1021 OpType = OperationType::Other;
1022 AllFlags = 0;
1023 }
1024 }
1025
1026 template <typename IterT>
1027 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1028 CmpInst::Predicate Pred, DebugLoc DL = {})
1029 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1030 CmpPredicate(Pred) {}
1031
1032 template <typename IterT>
1033 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1035 : VPSingleDefRecipe(SC, Operands, DL),
1036 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1037
1038 template <typename IterT>
1039 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1040 FastMathFlags FMFs, DebugLoc DL = {})
1041 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1042 FMFs(FMFs) {}
1043
1044 template <typename IterT>
1045 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1047 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1049
1050protected:
1051 template <typename IterT>
1052 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1054 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1055 GEPFlags(GEPFlags) {}
1056
1057public:
1058 static inline bool classof(const VPRecipeBase *R) {
1059 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1060 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1061 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1062 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1063 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1064 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1065 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1066 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1067 }
1068
1069 static inline bool classof(const VPUser *U) {
1070 auto *R = dyn_cast<VPRecipeBase>(U);
1071 return R && classof(R);
1072 }
1073
1074 /// Drop all poison-generating flags.
1076 // NOTE: This needs to be kept in-sync with
1077 // Instruction::dropPoisonGeneratingFlags.
1078 switch (OpType) {
1079 case OperationType::OverflowingBinOp:
1080 WrapFlags.HasNUW = false;
1081 WrapFlags.HasNSW = false;
1082 break;
1083 case OperationType::DisjointOp:
1084 DisjointFlags.IsDisjoint = false;
1085 break;
1086 case OperationType::PossiblyExactOp:
1087 ExactFlags.IsExact = false;
1088 break;
1089 case OperationType::GEPOp:
1091 break;
1092 case OperationType::FPMathOp:
1093 FMFs.NoNaNs = false;
1094 FMFs.NoInfs = false;
1095 break;
1096 case OperationType::NonNegOp:
1097 NonNegFlags.NonNeg = false;
1098 break;
1099 case OperationType::Cmp:
1100 case OperationType::Other:
1101 break;
1102 }
1103 }
1104
1105 /// Set the IR flags for \p I.
1106 void setFlags(Instruction *I) const {
1107 switch (OpType) {
1108 case OperationType::OverflowingBinOp:
1109 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1110 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1111 break;
1112 case OperationType::DisjointOp:
1113 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1114 break;
1115 case OperationType::PossiblyExactOp:
1116 I->setIsExact(ExactFlags.IsExact);
1117 break;
1118 case OperationType::GEPOp:
1119 cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
1120 break;
1121 case OperationType::FPMathOp:
1122 I->setHasAllowReassoc(FMFs.AllowReassoc);
1123 I->setHasNoNaNs(FMFs.NoNaNs);
1124 I->setHasNoInfs(FMFs.NoInfs);
1125 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1126 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1127 I->setHasAllowContract(FMFs.AllowContract);
1128 I->setHasApproxFunc(FMFs.ApproxFunc);
1129 break;
1130 case OperationType::NonNegOp:
1131 I->setNonNeg(NonNegFlags.NonNeg);
1132 break;
1133 case OperationType::Cmp:
1134 case OperationType::Other:
1135 break;
1136 }
1137 }
1138
1140 assert(OpType == OperationType::Cmp &&
1141 "recipe doesn't have a compare predicate");
1142 return CmpPredicate;
1143 }
1144
1146
1147 /// Returns true if the recipe has fast-math flags.
1148 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1149
1151
1152 bool hasNoUnsignedWrap() const {
1153 assert(OpType == OperationType::OverflowingBinOp &&
1154 "recipe doesn't have a NUW flag");
1155 return WrapFlags.HasNUW;
1156 }
1157
1158 bool hasNoSignedWrap() const {
1159 assert(OpType == OperationType::OverflowingBinOp &&
1160 "recipe doesn't have a NSW flag");
1161 return WrapFlags.HasNSW;
1162 }
1163
1164 bool isDisjoint() const {
1165 assert(OpType == OperationType::DisjointOp &&
1166 "recipe cannot have a disjoing flag");
1168 }
1169
1170#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1171 void printFlags(raw_ostream &O) const;
1172#endif
1173};
1174
1175/// Helper to access the operand that contains the unroll part for this recipe
1176/// after unrolling.
1177template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1178protected:
1179 /// Return the VPValue operand containing the unroll part or null if there is
1180 /// no such operand.
1182
1183 /// Return the unroll part.
1184 unsigned getUnrollPart(VPUser &U) const;
1185};
1186
1187/// This is a concrete Recipe that models a single VPlan-level instruction.
1188/// While as any Recipe it may generate a sequence of IR instructions when
1189/// executed, these instructions would always form a single-def expression as
1190/// the VPInstruction is also a single def-use vertex.
1192 public VPUnrollPartAccessor<1> {
1193 friend class VPlanSlp;
1194
1195public:
1196 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1197 enum {
1199 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1200 // values of a first-order recurrence.
1206 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1207 /// The first operand is the incoming value from the predecessor in VPlan,
1208 /// the second operand is the incoming value for all other predecessors
1209 /// (which are currently not modeled in VPlan).
1212 // Increment the canonical IV separately for each unrolled part.
1217 // Takes the VPValue to extract from as first operand and the lane or part
1218 // to extract as second operand, counting from the end starting with 1 for
1219 // last. The second operand must be a positive constant and <= VF.
1221 LogicalAnd, // Non-poison propagating logical And.
1222 // Add an offset in bytes (second operand) to a base pointer (first
1223 // operand). Only generates scalar values (either for the first lane only or
1224 // for all lanes, depending on its uses).
1226 // Returns a scalar boolean value, which is true if any lane of its single
1227 // operand is true.
1229 };
1230
1231private:
1232 typedef unsigned char OpcodeTy;
1233 OpcodeTy Opcode;
1234
1235 /// An optional name that can be used for the generated IR instruction.
1236 const std::string Name;
1237
1238 /// Returns true if this VPInstruction generates scalar values for all lanes.
1239 /// Most VPInstructions generate a single value per part, either vector or
1240 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1241 /// values per all lanes, stemming from an original ingredient. This method
1242 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1243 /// underlying ingredient.
1244 bool doesGeneratePerAllLanes() const;
1245
1246 /// Returns true if we can generate a scalar for the first lane only if
1247 /// needed.
1248 bool canGenerateScalarForFirstLane() const;
1249
1250 /// Utility methods serving execute(): generates a single vector instance of
1251 /// the modeled instruction. \returns the generated value. . In some cases an
1252 /// existing value is returned rather than a generated one.
1253 Value *generate(VPTransformState &State);
1254
1255 /// Utility methods serving execute(): generates a scalar single instance of
1256 /// the modeled instruction for a given lane. \returns the scalar generated
1257 /// value for lane \p Lane.
1258 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1259
1260#if !defined(NDEBUG)
1261 /// Return true if the VPInstruction is a floating point math operation, i.e.
1262 /// has fast-math flags.
1263 bool isFPMathOp() const;
1264#endif
1265
1266public:
1268 const Twine &Name = "")
1269 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1270 Opcode(Opcode), Name(Name.str()) {}
1271
1272 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1273 DebugLoc DL = {}, const Twine &Name = "")
1275
1276 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1277 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1278
1279 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1280 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1281 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1282 Opcode(Opcode), Name(Name.str()) {}
1283
1284 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1285 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1286 const Twine &Name = "")
1287 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1288 Opcode(Opcode), Name(Name.str()) {
1289 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1290 }
1291
1293 DebugLoc DL = {}, const Twine &Name = "")
1294 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1295 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1296 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1297
1298 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1299 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1300
1301 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1302
1303 VPInstruction *clone() override {
1305 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1306 New->transferFlags(*this);
1307 return New;
1308 }
1309
1310 unsigned getOpcode() const { return Opcode; }
1311
1312 /// Generate the instruction.
1313 /// TODO: We currently execute only per-part unless a specific instance is
1314 /// provided.
1315 void execute(VPTransformState &State) override;
1316
1317 /// Return the cost of this VPInstruction.
1319 VPCostContext &Ctx) const override {
1320 // TODO: Compute accurate cost after retiring the legacy cost model.
1321 return 0;
1322 }
1323
1324#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1325 /// Print the VPInstruction to \p O.
1326 void print(raw_ostream &O, const Twine &Indent,
1327 VPSlotTracker &SlotTracker) const override;
1328
1329 /// Print the VPInstruction to dbgs() (for debugging).
1330 LLVM_DUMP_METHOD void dump() const;
1331#endif
1332
1333 bool hasResult() const {
1334 // CallInst may or may not have a result, depending on the called function.
1335 // Conservatively return calls have results for now.
1336 switch (getOpcode()) {
1337 case Instruction::Ret:
1338 case Instruction::Br:
1339 case Instruction::Store:
1340 case Instruction::Switch:
1341 case Instruction::IndirectBr:
1342 case Instruction::Resume:
1343 case Instruction::CatchRet:
1344 case Instruction::Unreachable:
1345 case Instruction::Fence:
1346 case Instruction::AtomicRMW:
1349 return false;
1350 default:
1351 return true;
1352 }
1353 }
1354
1355 /// Returns true if the underlying opcode may read from or write to memory.
1356 bool opcodeMayReadOrWriteFromMemory() const;
1357
1358 /// Returns true if the recipe only uses the first lane of operand \p Op.
1359 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1360
1361 /// Returns true if the recipe only uses the first part of operand \p Op.
1362 bool onlyFirstPartUsed(const VPValue *Op) const override;
1363
1364 /// Returns true if this VPInstruction produces a scalar value from a vector,
1365 /// e.g. by performing a reduction or extracting a lane.
1366 bool isVectorToScalar() const;
1367
1368 /// Returns true if this VPInstruction's operands are single scalars and the
1369 /// result is also a single scalar.
1370 bool isSingleScalar() const;
1371
1372 /// Returns the symbolic name assigned to the VPInstruction.
1373 StringRef getName() const { return Name; }
1374};
1375
1376/// A recipe to wrap on original IR instruction not to be modified during
1377/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1378/// and it is used to add a new incoming value for the single predecessor VPBB.
1379/// Expect PHIs, VPIRInstructions cannot have any operands.
1381 Instruction &I;
1382
1383public:
1385 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1386
1387 ~VPIRInstruction() override = default;
1388
1389 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1390
1392 auto *R = new VPIRInstruction(I);
1393 for (auto *Op : operands())
1394 R->addOperand(Op);
1395 return R;
1396 }
1397
1398 void execute(VPTransformState &State) override;
1399
1400 /// Return the cost of this VPIRInstruction.
1402 VPCostContext &Ctx) const override;
1403
1404 Instruction &getInstruction() const { return I; }
1405
1406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1407 /// Print the recipe.
1408 void print(raw_ostream &O, const Twine &Indent,
1409 VPSlotTracker &SlotTracker) const override;
1410#endif
1411
1412 bool usesScalars(const VPValue *Op) const override {
1414 "Op must be an operand of the recipe");
1415 return true;
1416 }
1417
1418 bool onlyFirstPartUsed(const VPValue *Op) const override {
1420 "Op must be an operand of the recipe");
1421 return true;
1422 }
1423};
1424
1425/// VPWidenRecipe is a recipe for producing a widened instruction using the
1426/// opcode and operands of the recipe. This recipe covers most of the
1427/// traditional vectorization cases where each recipe transforms into a
1428/// vectorized version of itself.
1430 unsigned Opcode;
1431
1432protected:
1433 template <typename IterT>
1434 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1436 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1437
1438public:
1439 template <typename IterT>
1441 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1442
1443 ~VPWidenRecipe() override = default;
1444
1445 VPWidenRecipe *clone() override {
1446 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1447 R->transferFlags(*this);
1448 return R;
1449 }
1450
1451 static inline bool classof(const VPRecipeBase *R) {
1452 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1453 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1454 }
1455
1456 static inline bool classof(const VPUser *U) {
1457 auto *R = dyn_cast<VPRecipeBase>(U);
1458 return R && classof(R);
1459 }
1460
1461 /// Produce a widened instruction using the opcode and operands of the recipe,
1462 /// processing State.VF elements.
1463 void execute(VPTransformState &State) override;
1464
1465 /// Return the cost of this VPWidenRecipe.
1467 VPCostContext &Ctx) const override;
1468
1469 unsigned getOpcode() const { return Opcode; }
1470
1471#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1472 /// Print the recipe.
1473 void print(raw_ostream &O, const Twine &Indent,
1474 VPSlotTracker &SlotTracker) const override;
1475#endif
1476};
1477
1478/// A recipe for widening operations with vector-predication intrinsics with
1479/// explicit vector length (EVL).
1482
1483public:
1484 template <typename IterT>
1486 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1487 addOperand(&EVL);
1488 }
1490 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1491 transferFlags(W);
1492 }
1493
1494 ~VPWidenEVLRecipe() override = default;
1495
1496 VPWidenRecipe *clone() override final {
1497 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1498 return nullptr;
1499 }
1500
1501 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1502
1504 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1505
1506 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1507 /// processing EVL elements.
1508 void execute(VPTransformState &State) override final;
1509
1510 /// Returns true if the recipe only uses the first lane of operand \p Op.
1511 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1513 "Op must be an operand of the recipe");
1514 // EVL in that recipe is always the last operand, thus any use before means
1515 // the VPValue should be vectorized.
1516 return getEVL() == Op;
1517 }
1518
1519#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1520 /// Print the recipe.
1521 void print(raw_ostream &O, const Twine &Indent,
1522 VPSlotTracker &SlotTracker) const override final;
1523#endif
1524};
1525
1526/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1528 /// Cast instruction opcode.
1529 Instruction::CastOps Opcode;
1530
1531 /// Result type for the cast.
1532 Type *ResultTy;
1533
1534public:
1536 CastInst &UI)
1537 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1538 ResultTy(ResultTy) {
1539 assert(UI.getOpcode() == Opcode &&
1540 "opcode of underlying cast doesn't match");
1541 }
1542
1544 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1545 ResultTy(ResultTy) {}
1546
1547 ~VPWidenCastRecipe() override = default;
1548
1550 if (auto *UV = getUnderlyingValue())
1551 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1552 *cast<CastInst>(UV));
1553
1554 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1555 }
1556
1557 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1558
1559 /// Produce widened copies of the cast.
1560 void execute(VPTransformState &State) override;
1561
1562 /// Return the cost of this VPWidenCastRecipe.
1564 VPCostContext &Ctx) const override;
1565
1566#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1567 /// Print the recipe.
1568 void print(raw_ostream &O, const Twine &Indent,
1569 VPSlotTracker &SlotTracker) const override;
1570#endif
1571
1572 Instruction::CastOps getOpcode() const { return Opcode; }
1573
1574 /// Returns the result type of the cast.
1575 Type *getResultType() const { return ResultTy; }
1576};
1577
1578/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1580 Instruction::CastOps Opcode;
1581
1582 Type *ResultTy;
1583
1584 Value *generate(VPTransformState &State);
1585
1586public:
1588 DebugLoc DL)
1589 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
1590 ResultTy(ResultTy) {}
1591
1592 ~VPScalarCastRecipe() override = default;
1593
1595 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
1596 getDebugLoc());
1597 }
1598
1599 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1600
1601 void execute(VPTransformState &State) override;
1602
1603 /// Return the cost of this VPScalarCastRecipe.
1605 VPCostContext &Ctx) const override {
1606 // TODO: Compute accurate cost after retiring the legacy cost model.
1607 return 0;
1608 }
1609
1610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1611 void print(raw_ostream &O, const Twine &Indent,
1612 VPSlotTracker &SlotTracker) const override;
1613#endif
1614
1615 /// Returns the result type of the cast.
1616 Type *getResultType() const { return ResultTy; }
1617
1618 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1619 // At the moment, only uniform codegen is implemented.
1621 "Op must be an operand of the recipe");
1622 return true;
1623 }
1624};
1625
1626/// A recipe for widening vector intrinsics.
1628 /// ID of the vector intrinsic to widen.
1629 Intrinsic::ID VectorIntrinsicID;
1630
1631 /// Scalar return type of the intrinsic.
1632 Type *ResultTy;
1633
1634 /// True if the intrinsic may read from memory.
1635 bool MayReadFromMemory;
1636
1637 /// True if the intrinsic may read write to memory.
1638 bool MayWriteToMemory;
1639
1640 /// True if the intrinsic may have side-effects.
1641 bool MayHaveSideEffects;
1642
1643public:
1645 ArrayRef<VPValue *> CallArguments, Type *Ty,
1646 DebugLoc DL = {})
1647 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1648 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1649 MayReadFromMemory(CI.mayReadFromMemory()),
1650 MayWriteToMemory(CI.mayWriteToMemory()),
1651 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1652
1654 ArrayRef<VPValue *> CallArguments, Type *Ty,
1655 DebugLoc DL = {})
1656 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1657 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1658 LLVMContext &Ctx = Ty->getContext();
1659 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1660 MemoryEffects ME = Attrs.getMemoryEffects();
1661 MayReadFromMemory = ME.onlyWritesMemory();
1662 MayWriteToMemory = ME.onlyReadsMemory();
1663 MayHaveSideEffects = MayWriteToMemory ||
1664 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1665 !Attrs.hasFnAttr(Attribute::WillReturn);
1666 }
1667
1669 std::initializer_list<VPValue *> CallArguments,
1670 Type *Ty, DebugLoc DL = {})
1671 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1672 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1673
1674 ~VPWidenIntrinsicRecipe() override = default;
1675
1677 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1678 VectorIntrinsicID, {op_begin(), op_end()},
1679 ResultTy, getDebugLoc());
1680 }
1681
1682 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1683
1684 /// Produce a widened version of the vector intrinsic.
1685 void execute(VPTransformState &State) override;
1686
1687 /// Return the cost of this vector intrinsic.
1689 VPCostContext &Ctx) const override;
1690
1691 /// Return the ID of the intrinsic.
1692 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1693
1694 /// Return the scalar return type of the intrinsic.
1695 Type *getResultType() const { return ResultTy; }
1696
1697 /// Return to name of the intrinsic as string.
1699
1700 /// Returns true if the intrinsic may read from memory.
1701 bool mayReadFromMemory() const { return MayReadFromMemory; }
1702
1703 /// Returns true if the intrinsic may write to memory.
1704 bool mayWriteToMemory() const { return MayWriteToMemory; }
1705
1706 /// Returns true if the intrinsic may have side-effects.
1707 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1708
1709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1710 /// Print the recipe.
1711 void print(raw_ostream &O, const Twine &Indent,
1712 VPSlotTracker &SlotTracker) const override;
1713#endif
1714
1715 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1716};
1717
1718/// A recipe for widening Call instructions using library calls.
1720 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1721 /// between a given VF and the chosen vectorized variant, so there will be a
1722 /// different VPlan for each VF with a valid variant.
1723 Function *Variant;
1724
1725public:
1727 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1728 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1729 *cast<Instruction>(UV)),
1730 Variant(Variant) {
1731 assert(
1732 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1733 "last operand must be the called function");
1734 }
1735
1736 ~VPWidenCallRecipe() override = default;
1737
1739 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1740 {op_begin(), op_end()}, getDebugLoc());
1741 }
1742
1743 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1744
1745 /// Produce a widened version of the call instruction.
1746 void execute(VPTransformState &State) override;
1747
1748 /// Return the cost of this VPWidenCallRecipe.
1750 VPCostContext &Ctx) const override;
1751
1753 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1754 }
1755
1757 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1758 }
1760 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1761 }
1762
1763#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1764 /// Print the recipe.
1765 void print(raw_ostream &O, const Twine &Indent,
1766 VPSlotTracker &SlotTracker) const override;
1767#endif
1768};
1769
1770/// A recipe representing a sequence of load -> update -> store as part of
1771/// a histogram operation. This means there may be aliasing between vector
1772/// lanes, which is handled by the llvm.experimental.vector.histogram family
1773/// of intrinsics. The only update operations currently supported are
1774/// 'add' and 'sub' where the other term is loop-invariant.
1776 /// Opcode of the update operation, currently either add or sub.
1777 unsigned Opcode;
1778
1779public:
1780 template <typename IterT>
1781 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1782 DebugLoc DL = {})
1783 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1784
1785 ~VPHistogramRecipe() override = default;
1786
1788 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1789 }
1790
1791 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1792
1793 /// Produce a vectorized histogram operation.
1794 void execute(VPTransformState &State) override;
1795
1796 /// Return the cost of this VPHistogramRecipe.
1798 VPCostContext &Ctx) const override;
1799
1800 unsigned getOpcode() const { return Opcode; }
1801
1802 /// Return the mask operand if one was provided, or a null pointer if all
1803 /// lanes should be executed unconditionally.
1804 VPValue *getMask() const {
1805 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1806 }
1807
1808#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1809 /// Print the recipe
1810 void print(raw_ostream &O, const Twine &Indent,
1811 VPSlotTracker &SlotTracker) const override;
1812#endif
1813};
1814
1815/// A recipe for widening select instructions.
1817 template <typename IterT>
1819 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1820 I.getDebugLoc()) {}
1821
1822 ~VPWidenSelectRecipe() override = default;
1823
1825 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1826 operands());
1827 }
1828
1829 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1830
1831 /// Produce a widened version of the select instruction.
1832 void execute(VPTransformState &State) override;
1833
1834 /// Return the cost of this VPWidenSelectRecipe.
1836 VPCostContext &Ctx) const override;
1837
1838#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1839 /// Print the recipe.
1840 void print(raw_ostream &O, const Twine &Indent,
1841 VPSlotTracker &SlotTracker) const override;
1842#endif
1843
1844 VPValue *getCond() const {
1845 return getOperand(0);
1846 }
1847
1848 bool isInvariantCond() const {
1850 }
1851};
1852
1853/// A recipe for handling GEP instructions.
1855 bool isPointerLoopInvariant() const {
1857 }
1858
1859 bool isIndexLoopInvariant(unsigned I) const {
1861 }
1862
1863 bool areAllOperandsInvariant() const {
1864 return all_of(operands(), [](VPValue *Op) {
1865 return Op->isDefinedOutsideLoopRegions();
1866 });
1867 }
1868
1869public:
1870 template <typename IterT>
1872 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1873
1874 ~VPWidenGEPRecipe() override = default;
1875
1877 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1878 operands());
1879 }
1880
1881 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1882
1883 /// Generate the gep nodes.
1884 void execute(VPTransformState &State) override;
1885
1886 /// Return the cost of this VPWidenGEPRecipe.
1888 VPCostContext &Ctx) const override {
1889 // TODO: Compute accurate cost after retiring the legacy cost model.
1890 return 0;
1891 }
1892
1893#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1894 /// Print the recipe.
1895 void print(raw_ostream &O, const Twine &Indent,
1896 VPSlotTracker &SlotTracker) const override;
1897#endif
1898};
1899
1900/// A recipe to compute the pointers for widened memory accesses of IndexTy
1901/// in reverse order.
1903 public VPUnrollPartAccessor<2> {
1904 Type *IndexedTy;
1905
1906public:
1909 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1910 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1911 IndexedTy(IndexedTy) {}
1912
1913 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1914
1916 const VPValue *getVFValue() const { return getOperand(1); }
1917
1918 void execute(VPTransformState &State) override;
1919
1920 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1922 "Op must be an operand of the recipe");
1923 return true;
1924 }
1925
1926 /// Return the cost of this VPVectorPointerRecipe.
1928 VPCostContext &Ctx) const override {
1929 // TODO: Compute accurate cost after retiring the legacy cost model.
1930 return 0;
1931 }
1932
1933 /// Returns true if the recipe only uses the first part of operand \p Op.
1934 bool onlyFirstPartUsed(const VPValue *Op) const override {
1936 "Op must be an operand of the recipe");
1937 assert(getNumOperands() <= 2 && "must have at most two operands");
1938 return true;
1939 }
1940
1943 IndexedTy, getGEPNoWrapFlags(),
1944 getDebugLoc());
1945 }
1946
1947#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1948 /// Print the recipe.
1949 void print(raw_ostream &O, const Twine &Indent,
1950 VPSlotTracker &SlotTracker) const override;
1951#endif
1952};
1953
1954/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1956 public VPUnrollPartAccessor<1> {
1957 Type *IndexedTy;
1958
1959public:
1961 DebugLoc DL)
1962 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1963 GEPFlags, DL),
1964 IndexedTy(IndexedTy) {}
1965
1966 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1967
1968 void execute(VPTransformState &State) override;
1969
1970 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1972 "Op must be an operand of the recipe");
1973 return true;
1974 }
1975
1976 /// Returns true if the recipe only uses the first part of operand \p Op.
1977 bool onlyFirstPartUsed(const VPValue *Op) const override {
1979 "Op must be an operand of the recipe");
1980 assert(getNumOperands() <= 2 && "must have at most two operands");
1981 return true;
1982 }
1983
1985 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1987 }
1988
1989 /// Return the cost of this VPHeaderPHIRecipe.
1991 VPCostContext &Ctx) const override {
1992 // TODO: Compute accurate cost after retiring the legacy cost model.
1993 return 0;
1994 }
1995
1996#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1997 /// Print the recipe.
1998 void print(raw_ostream &O, const Twine &Indent,
1999 VPSlotTracker &SlotTracker) const override;
2000#endif
2001};
2002
2003/// A pure virtual base class for all recipes modeling header phis, including
2004/// phis for first order recurrences, pointer inductions and reductions. The
2005/// start value is the first operand of the recipe and the incoming value from
2006/// the backedge is the second operand.
2007///
2008/// Inductions are modeled using the following sub-classes:
2009/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2010/// starting at a specified value (zero for the main vector loop, the resume
2011/// value for the epilogue vector loop) and stepping by 1. The induction
2012/// controls exiting of the vector loop by comparing against the vector trip
2013/// count. Produces a single scalar PHI for the induction value per
2014/// iteration.
2015/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2016/// floating point inductions with arbitrary start and step values. Produces
2017/// a vector PHI per-part.
2018/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2019/// value of an IV with different start and step values. Produces a single
2020/// scalar value per iteration
2021/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2022/// canonical or derived induction.
2023/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2024/// pointer induction. Produces either a vector PHI per-part or scalar values
2025/// per-lane based on the canonical induction.
2027protected:
2028 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2029 VPValue *Start = nullptr, DebugLoc DL = {})
2030 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2031 if (Start)
2032 addOperand(Start);
2033 }
2034
2035public:
2036 ~VPHeaderPHIRecipe() override = default;
2037
2038 /// Method to support type inquiry through isa, cast, and dyn_cast.
2039 static inline bool classof(const VPRecipeBase *B) {
2040 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2041 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2042 }
2043 static inline bool classof(const VPValue *V) {
2044 auto *B = V->getDefiningRecipe();
2045 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2046 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2047 }
2048
2049 /// Generate the phi nodes.
2050 void execute(VPTransformState &State) override = 0;
2051
2052 /// Return the cost of this header phi recipe.
2054 VPCostContext &Ctx) const override;
2055
2056#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2057 /// Print the recipe.
2058 void print(raw_ostream &O, const Twine &Indent,
2059 VPSlotTracker &SlotTracker) const override = 0;
2060#endif
2061
2062 /// Returns the start value of the phi, if one is set.
2064 return getNumOperands() == 0 ? nullptr : getOperand(0);
2065 }
2067 return getNumOperands() == 0 ? nullptr : getOperand(0);
2068 }
2069
2070 /// Update the start value of the recipe.
2072
2073 /// Returns the incoming value from the loop backedge.
2075 return getOperand(1);
2076 }
2077
2078 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2079 /// to be a recipe.
2082 }
2083};
2084
2085/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2086/// VPWidenPointerInductionRecipe), providing shared functionality, including
2087/// retrieving the step value, induction descriptor and original phi node.
2089 const InductionDescriptor &IndDesc;
2090
2091public:
2092 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2093 VPValue *Step, const InductionDescriptor &IndDesc,
2094 DebugLoc DL)
2095 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2096 addOperand(Step);
2097 }
2098
2099 static inline bool classof(const VPRecipeBase *R) {
2100 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2101 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2102 }
2103
2104 static inline bool classof(const VPValue *V) {
2105 auto *R = V->getDefiningRecipe();
2106 return R && classof(R);
2107 }
2108
2109 static inline bool classof(const VPHeaderPHIRecipe *R) {
2110 return classof(static_cast<const VPRecipeBase *>(R));
2111 }
2112
2113 virtual void execute(VPTransformState &State) override = 0;
2114
2115 /// Returns the step value of the induction.
2117 const VPValue *getStepValue() const { return getOperand(1); }
2118
2119 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2120
2121 /// Returns the induction descriptor for the recipe.
2122 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2123
2125 // TODO: All operands of base recipe must exist and be at same index in
2126 // derived recipe.
2128 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2129 }
2130
2132 // TODO: All operands of base recipe must exist and be at same index in
2133 // derived recipe.
2135 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2136 }
2137};
2138
2139/// A recipe for handling phi nodes of integer and floating-point inductions,
2140/// producing their vector values.
2142 TruncInst *Trunc;
2143
2144public:
2146 VPValue *VF, const InductionDescriptor &IndDesc,
2147 DebugLoc DL)
2148 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2149 Step, IndDesc, DL),
2150 Trunc(nullptr) {
2151 addOperand(VF);
2152 }
2153
2155 VPValue *VF, const InductionDescriptor &IndDesc,
2156 TruncInst *Trunc, DebugLoc DL)
2157 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2158 Step, IndDesc, DL),
2159 Trunc(Trunc) {
2160 addOperand(VF);
2161 }
2162
2164
2169 }
2170
2171 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2172
2173 /// Generate the vectorized and scalarized versions of the phi node as
2174 /// needed by their users.
2175 void execute(VPTransformState &State) override;
2176
2177#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2178 /// Print the recipe.
2179 void print(raw_ostream &O, const Twine &Indent,
2180 VPSlotTracker &SlotTracker) const override;
2181#endif
2182
2184 const VPValue *getVFValue() const { return getOperand(2); }
2185
2187 // If the recipe has been unrolled (4 operands), return the VPValue for the
2188 // induction increment.
2189 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2190 }
2191
2192 /// Returns the first defined value as TruncInst, if it is one or nullptr
2193 /// otherwise.
2194 TruncInst *getTruncInst() { return Trunc; }
2195 const TruncInst *getTruncInst() const { return Trunc; }
2196
2197 /// Returns true if the induction is canonical, i.e. starting at 0 and
2198 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2199 /// same type as the canonical induction.
2200 bool isCanonical() const;
2201
2202 /// Returns the scalar type of the induction.
2204 return Trunc ? Trunc->getType() : getPHINode()->getType();
2205 }
2206
2207 /// Returns the VPValue representing the value of this induction at
2208 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2209 /// take place.
2211 return getNumOperands() == 5 ? getOperand(4) : this;
2212 }
2213};
2214
2216 public VPUnrollPartAccessor<3> {
2217 bool IsScalarAfterVectorization;
2218
2219public:
2220 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2221 /// Start.
2223 const InductionDescriptor &IndDesc,
2224 bool IsScalarAfterVectorization, DebugLoc DL)
2225 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2226 Step, IndDesc, DL),
2227 IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2228
2230
2233 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2234 getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2235 }
2236
2237 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2238
2239 /// Generate vector values for the pointer induction.
2240 void execute(VPTransformState &State) override;
2241
2242 /// Returns true if only scalar values will be generated.
2243 bool onlyScalarsGenerated(bool IsScalable);
2244
2245 /// Returns the VPValue representing the value of this induction at
2246 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2247 /// take place.
2249 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2250 }
2251
2252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2253 /// Print the recipe.
2254 void print(raw_ostream &O, const Twine &Indent,
2255 VPSlotTracker &SlotTracker) const override;
2256#endif
2257};
2258
2259/// Recipe to generate a scalar PHI. Used to generate code for recipes that
2260/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2261/// VPEVLBasedIVPHIRecipe.
2263 std::string Name;
2264
2265public:
2266 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2267 StringRef Name)
2268 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2269 Name(Name.str()) {
2270 addOperand(BackedgeValue);
2271 }
2272
2273 ~VPScalarPHIRecipe() override = default;
2274
2276 llvm_unreachable("cloning not implemented yet");
2277 }
2278
2279 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2280
2281 /// Generate the phi/select nodes.
2282 void execute(VPTransformState &State) override;
2283
2284 /// Returns true if the recipe only uses the first lane of operand \p Op.
2285 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2287 "Op must be an operand of the recipe");
2288 return true;
2289 }
2290
2291#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2292 /// Print the recipe.
2293 void print(raw_ostream &O, const Twine &Indent,
2294 VPSlotTracker &SlotTracker) const override;
2295#endif
2296};
2297
2298/// A recipe for handling phis that are widened in the vector loop.
2299/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2300/// managed in the recipe directly.
2302 /// List of incoming blocks. Only used in the VPlan native path.
2303 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2304
2305public:
2306 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2307 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
2308 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
2309 if (Start)
2310 addOperand(Start);
2311 }
2312
2314 llvm_unreachable("cloning not implemented yet");
2315 }
2316
2317 ~VPWidenPHIRecipe() override = default;
2318
2319 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2320
2321 /// Generate the phi/select nodes.
2322 void execute(VPTransformState &State) override;
2323
2324#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2325 /// Print the recipe.
2326 void print(raw_ostream &O, const Twine &Indent,
2327 VPSlotTracker &SlotTracker) const override;
2328#endif
2329
2330 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2331 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2332 addOperand(IncomingV);
2333 IncomingBlocks.push_back(IncomingBlock);
2334 }
2335
2336 /// Returns the \p I th incoming VPBasicBlock.
2337 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2338
2339 /// Returns the \p I th incoming VPValue.
2340 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2341};
2342
2343/// A recipe for handling first-order recurrence phis. The start value is the
2344/// first operand of the recipe and the incoming value from the backedge is the
2345/// second operand.
2348 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2349
2350 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2351
2353 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2354 }
2355
2358 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2359 }
2360
2361 void execute(VPTransformState &State) override;
2362
2363 /// Return the cost of this first-order recurrence phi recipe.
2365 VPCostContext &Ctx) const override;
2366
2367#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2368 /// Print the recipe.
2369 void print(raw_ostream &O, const Twine &Indent,
2370 VPSlotTracker &SlotTracker) const override;
2371#endif
2372};
2373
2374/// A recipe for handling reduction phis. The start value is the first operand
2375/// of the recipe and the incoming value from the backedge is the second
2376/// operand.
2378 public VPUnrollPartAccessor<2> {
2379 /// Descriptor for the reduction.
2380 const RecurrenceDescriptor &RdxDesc;
2381
2382 /// The phi is part of an in-loop reduction.
2383 bool IsInLoop;
2384
2385 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2386 bool IsOrdered;
2387
2388 /// When expanding the reduction PHI, the plan's VF element count is divided
2389 /// by this factor to form the reduction phi's VF.
2390 unsigned VFScaleFactor = 1;
2391
2392public:
2393 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2394 /// RdxDesc.
2396 VPValue &Start, bool IsInLoop = false,
2397 bool IsOrdered = false, unsigned VFScaleFactor = 1)
2398 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2399 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered),
2400 VFScaleFactor(VFScaleFactor) {
2401 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2402 }
2403
2404 ~VPReductionPHIRecipe() override = default;
2405
2407 auto *R = new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()),
2408 RdxDesc, *getOperand(0), IsInLoop,
2409 IsOrdered, VFScaleFactor);
2410 R->addOperand(getBackedgeValue());
2411 return R;
2412 }
2413
2414 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2415
2417 return R->getVPDefID() == VPDef::VPReductionPHISC;
2418 }
2419
2420 /// Generate the phi/select nodes.
2421 void execute(VPTransformState &State) override;
2422
2423#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2424 /// Print the recipe.
2425 void print(raw_ostream &O, const Twine &Indent,
2426 VPSlotTracker &SlotTracker) const override;
2427#endif
2428
2430 return RdxDesc;
2431 }
2432
2433 /// Returns true, if the phi is part of an ordered reduction.
2434 bool isOrdered() const { return IsOrdered; }
2435
2436 /// Returns true, if the phi is part of an in-loop reduction.
2437 bool isInLoop() const { return IsInLoop; }
2438};
2439
2440/// A recipe for forming partial reductions. In the loop, an accumulator and
2441/// vector operand are added together and passed to the next iteration as the
2442/// next accumulator. After the loop body, the accumulator is reduced to a
2443/// scalar value.
2445 unsigned Opcode;
2446
2447public:
2449 VPValue *Op1)
2450 : VPPartialReductionRecipe(ReductionInst->getOpcode(), Op0, Op1,
2451 ReductionInst) {}
2452 VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1,
2453 Instruction *ReductionInst = nullptr)
2454 : VPSingleDefRecipe(VPDef::VPPartialReductionSC,
2455 ArrayRef<VPValue *>({Op0, Op1}), ReductionInst),
2456 Opcode(Opcode) {
2457 assert(isa<VPReductionPHIRecipe>(getOperand(1)->getDefiningRecipe()) &&
2458 "Unexpected operand order for partial reduction recipe");
2459 }
2460 ~VPPartialReductionRecipe() override = default;
2461
2463 return new VPPartialReductionRecipe(Opcode, getOperand(0), getOperand(1));
2464 }
2465
2466 VP_CLASSOF_IMPL(VPDef::VPPartialReductionSC)
2467
2468 /// Generate the reduction in the loop.
2469 void execute(VPTransformState &State) override;
2470
2471 /// Return the cost of this VPPartialReductionRecipe.
2473 VPCostContext &Ctx) const override;
2474
2475 /// Get the binary op's opcode.
2476 unsigned getOpcode() const { return Opcode; }
2477
2478#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2479 /// Print the recipe.
2480 void print(raw_ostream &O, const Twine &Indent,
2481 VPSlotTracker &SlotTracker) const override;
2482#endif
2483};
2484
2485/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2486/// instructions.
2488public:
2489 /// The blend operation is a User of the incoming values and of their
2490 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2491 /// be omitted (implied by passing an odd number of operands) in which case
2492 /// all other incoming values are merged into it.
2494 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2495 assert(Operands.size() > 0 && "Expected at least one operand!");
2496 }
2497
2498 VPBlendRecipe *clone() override {
2500 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2501 }
2502
2503 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2504
2505 /// A normalized blend is one that has an odd number of operands, whereby the
2506 /// first operand does not have an associated mask.
2507 bool isNormalized() const { return getNumOperands() % 2; }
2508
2509 /// Return the number of incoming values, taking into account when normalized
2510 /// the first incoming value will have no mask.
2511 unsigned getNumIncomingValues() const {
2512 return (getNumOperands() + isNormalized()) / 2;
2513 }
2514
2515 /// Return incoming value number \p Idx.
2516 VPValue *getIncomingValue(unsigned Idx) const {
2517 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2518 }
2519
2520 /// Return mask number \p Idx.
2521 VPValue *getMask(unsigned Idx) const {
2522 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2523 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2524 }
2525
2526 /// Generate the phi/select nodes.
2527 void execute(VPTransformState &State) override;
2528
2529 /// Return the cost of this VPWidenMemoryRecipe.
2531 VPCostContext &Ctx) const override;
2532
2533#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2534 /// Print the recipe.
2535 void print(raw_ostream &O, const Twine &Indent,
2536 VPSlotTracker &SlotTracker) const override;
2537#endif
2538
2539 /// Returns true if the recipe only uses the first lane of operand \p Op.
2540 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2542 "Op must be an operand of the recipe");
2543 // Recursing through Blend recipes only, must terminate at header phi's the
2544 // latest.
2545 return all_of(users(),
2546 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2547 }
2548};
2549
2550/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2551/// or stores into one wide load/store and shuffles. The first operand of a
2552/// VPInterleave recipe is the address, followed by the stored values, followed
2553/// by an optional mask.
2556
2557 /// Indicates if the interleave group is in a conditional block and requires a
2558 /// mask.
2559 bool HasMask = false;
2560
2561 /// Indicates if gaps between members of the group need to be masked out or if
2562 /// unusued gaps can be loaded speculatively.
2563 bool NeedsMaskForGaps = false;
2564
2565public:
2567 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2568 bool NeedsMaskForGaps)
2569 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2570 NeedsMaskForGaps(NeedsMaskForGaps) {
2571 for (unsigned i = 0; i < IG->getFactor(); ++i)
2572 if (Instruction *I = IG->getMember(i)) {
2573 if (I->getType()->isVoidTy())
2574 continue;
2575 new VPValue(I, this);
2576 }
2577
2578 for (auto *SV : StoredValues)
2579 addOperand(SV);
2580 if (Mask) {
2581 HasMask = true;
2582 addOperand(Mask);
2583 }
2584 }
2585 ~VPInterleaveRecipe() override = default;
2586
2588 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2589 NeedsMaskForGaps);
2590 }
2591
2592 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2593
2594 /// Return the address accessed by this recipe.
2595 VPValue *getAddr() const {
2596 return getOperand(0); // Address is the 1st, mandatory operand.
2597 }
2598
2599 /// Return the mask used by this recipe. Note that a full mask is represented
2600 /// by a nullptr.
2601 VPValue *getMask() const {
2602 // Mask is optional and therefore the last, currently 2nd operand.
2603 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2604 }
2605
2606 /// Return the VPValues stored by this interleave group. If it is a load
2607 /// interleave group, return an empty ArrayRef.
2609 // The first operand is the address, followed by the stored values, followed
2610 // by an optional mask.
2613 }
2614
2615 /// Generate the wide load or store, and shuffles.
2616 void execute(VPTransformState &State) override;
2617
2618 /// Return the cost of this VPInterleaveRecipe.
2620 VPCostContext &Ctx) const override;
2621
2622#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2623 /// Print the recipe.
2624 void print(raw_ostream &O, const Twine &Indent,
2625 VPSlotTracker &SlotTracker) const override;
2626#endif
2627
2629
2630 /// Returns the number of stored operands of this interleave group. Returns 0
2631 /// for load interleave groups.
2632 unsigned getNumStoreOperands() const {
2633 return getNumOperands() - (HasMask ? 2 : 1);
2634 }
2635
2636 /// The recipe only uses the first lane of the address.
2637 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2639 "Op must be an operand of the recipe");
2640 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2641 }
2642
2643 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2644};
2645
2646/// A recipe to represent inloop reduction operations, performing a reduction on
2647/// a vector operand into a scalar value, and adding the result to a chain.
2648/// The Operands are {ChainOp, VecOp, [Condition]}.
2650 /// The recurrence decriptor for the reduction in question.
2651 const RecurrenceDescriptor &RdxDesc;
2652 bool IsOrdered;
2653 /// Whether the reduction is conditional.
2654 bool IsConditional = false;
2655
2656protected:
2657 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2659 VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2660 : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2661 IsOrdered(IsOrdered) {
2662 if (CondOp) {
2663 IsConditional = true;
2664 addOperand(CondOp);
2665 }
2666 }
2667
2668public:
2670 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2671 bool IsOrdered, DebugLoc DL = {})
2672 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2673 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2674 IsOrdered, DL) {}
2675
2676 ~VPReductionRecipe() override = default;
2677
2679 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2680 getVecOp(), getCondOp(), IsOrdered,
2681 getDebugLoc());
2682 }
2683
2684 static inline bool classof(const VPRecipeBase *R) {
2685 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2686 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2687 }
2688
2689 static inline bool classof(const VPUser *U) {
2690 auto *R = dyn_cast<VPRecipeBase>(U);
2691 return R && classof(R);
2692 }
2693
2694 /// Generate the reduction in the loop.
2695 void execute(VPTransformState &State) override;
2696
2697 /// Return the cost of VPReductionRecipe.
2699 VPCostContext &Ctx) const override;
2700
2701#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2702 /// Print the recipe.
2703 void print(raw_ostream &O, const Twine &Indent,
2704 VPSlotTracker &SlotTracker) const override;
2705#endif
2706
2707 /// Return the recurrence decriptor for the in-loop reduction.
2709 return RdxDesc;
2710 }
2711 /// Return true if the in-loop reduction is ordered.
2712 bool isOrdered() const { return IsOrdered; };
2713 /// Return true if the in-loop reduction is conditional.
2714 bool isConditional() const { return IsConditional; };
2715 /// The VPValue of the scalar Chain being accumulated.
2716 VPValue *getChainOp() const { return getOperand(0); }
2717 /// The VPValue of the vector value to be reduced.
2718 VPValue *getVecOp() const { return getOperand(1); }
2719 /// The VPValue of the condition for the block.
2721 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2722 }
2723};
2724
2725/// A recipe to represent inloop reduction operations with vector-predication
2726/// intrinsics, performing a reduction on a vector operand with the explicit
2727/// vector length (EVL) into a scalar value, and adding the result to a chain.
2728/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2730public:
2733 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2735 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2736 R.isOrdered(), R.getDebugLoc()) {}
2737
2738 ~VPReductionEVLRecipe() override = default;
2739
2741 llvm_unreachable("cloning not implemented yet");
2742 }
2743
2744 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2745
2746 /// Generate the reduction in the loop
2747 void execute(VPTransformState &State) override;
2748
2749#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2750 /// Print the recipe.
2751 void print(raw_ostream &O, const Twine &Indent,
2752 VPSlotTracker &SlotTracker) const override;
2753#endif
2754
2755 /// The VPValue of the explicit vector length.
2756 VPValue *getEVL() const { return getOperand(2); }
2757
2758 /// Returns true if the recipe only uses the first lane of operand \p Op.
2759 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2761 "Op must be an operand of the recipe");
2762 return Op == getEVL();
2763 }
2764};
2765
2766/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2767/// copies of the original scalar type, one per lane, instead of producing a
2768/// single copy of widened type for all lanes. If the instruction is known to be
2769/// uniform only one copy, per lane zero, will be generated.
2771 /// Indicator if only a single replica per lane is needed.
2772 bool IsUniform;
2773
2774 /// Indicator if the replicas are also predicated.
2775 bool IsPredicated;
2776
2777public:
2778 template <typename IterT>
2780 bool IsUniform, VPValue *Mask = nullptr)
2781 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2782 IsUniform(IsUniform), IsPredicated(Mask) {
2783 if (Mask)
2784 addOperand(Mask);
2785 }
2786
2787 ~VPReplicateRecipe() override = default;
2788
2790 auto *Copy =
2791 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2792 isPredicated() ? getMask() : nullptr);
2793 Copy->transferFlags(*this);
2794 return Copy;
2795 }
2796
2797 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2798
2799 /// Generate replicas of the desired Ingredient. Replicas will be generated
2800 /// for all parts and lanes unless a specific part and lane are specified in
2801 /// the \p State.
2802 void execute(VPTransformState &State) override;
2803
2804 /// Return the cost of this VPReplicateRecipe.
2806 VPCostContext &Ctx) const override;
2807
2808#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2809 /// Print the recipe.
2810 void print(raw_ostream &O, const Twine &Indent,
2811 VPSlotTracker &SlotTracker) const override;
2812#endif
2813
2814 bool isUniform() const { return IsUniform; }
2815
2816 bool isPredicated() const { return IsPredicated; }
2817
2818 /// Returns true if the recipe only uses the first lane of operand \p Op.
2819 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2821 "Op must be an operand of the recipe");
2822 return isUniform();
2823 }
2824
2825 /// Returns true if the recipe uses scalars of operand \p Op.
2826 bool usesScalars(const VPValue *Op) const override {
2828 "Op must be an operand of the recipe");
2829 return true;
2830 }
2831
2832 /// Returns true if the recipe is used by a widened recipe via an intervening
2833 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2834 /// in a vector.
2835 bool shouldPack() const;
2836
2837 /// Return the mask of a predicated VPReplicateRecipe.
2839 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2840 return getOperand(getNumOperands() - 1);
2841 }
2842
2843 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2844};
2845
2846/// A recipe for generating conditional branches on the bits of a mask.
2848public:
2850 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2851 if (BlockInMask) // nullptr means all-one mask.
2852 addOperand(BlockInMask);
2853 }
2854
2856 return new VPBranchOnMaskRecipe(getOperand(0));
2857 }
2858
2859 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2860
2861 /// Generate the extraction of the appropriate bit from the block mask and the
2862 /// conditional branch.
2863 void execute(VPTransformState &State) override;
2864
2865 /// Return the cost of this VPBranchOnMaskRecipe.
2867 VPCostContext &Ctx) const override;
2868
2869#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2870 /// Print the recipe.
2871 void print(raw_ostream &O, const Twine &Indent,
2872 VPSlotTracker &SlotTracker) const override {
2873 O << Indent << "BRANCH-ON-MASK ";
2874 if (VPValue *Mask = getMask())
2875 Mask->printAsOperand(O, SlotTracker);
2876 else
2877 O << " All-One";
2878 }
2879#endif
2880
2881 /// Return the mask used by this recipe. Note that a full mask is represented
2882 /// by a nullptr.
2883 VPValue *getMask() const {
2884 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2885 // Mask is optional.
2886 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2887 }
2888
2889 /// Returns true if the recipe uses scalars of operand \p Op.
2890 bool usesScalars(const VPValue *Op) const override {
2892 "Op must be an operand of the recipe");
2893 return true;
2894 }
2895};
2896
2897/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2898/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2899/// order to merge values that are set under such a branch and feed their uses.
2900/// The phi nodes can be scalar or vector depending on the users of the value.
2901/// This recipe works in concert with VPBranchOnMaskRecipe.
2903public:
2904 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2905 /// nodes after merging back from a Branch-on-Mask.
2907 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
2908 ~VPPredInstPHIRecipe() override = default;
2909
2911 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
2912 }
2913
2914 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2915
2916 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2917 /// retain SSA form.
2918 void execute(VPTransformState &State) override;
2919
2920 /// Return the cost of this VPPredInstPHIRecipe.
2922 VPCostContext &Ctx) const override {
2923 // TODO: Compute accurate cost after retiring the legacy cost model.
2924 return 0;
2925 }
2926
2927#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2928 /// Print the recipe.
2929 void print(raw_ostream &O, const Twine &Indent,
2930 VPSlotTracker &SlotTracker) const override;
2931#endif
2932
2933 /// Returns true if the recipe uses scalars of operand \p Op.
2934 bool usesScalars(const VPValue *Op) const override {
2936 "Op must be an operand of the recipe");
2937 return true;
2938 }
2939};
2940
2941/// A common base class for widening memory operations. An optional mask can be
2942/// provided as the last operand.
2944protected:
2946
2947 /// Whether the accessed addresses are consecutive.
2949
2950 /// Whether the consecutive accessed addresses are in reverse order.
2952
2953 /// Whether the memory access is masked.
2954 bool IsMasked = false;
2955
2956 void setMask(VPValue *Mask) {
2957 assert(!IsMasked && "cannot re-set mask");
2958 if (!Mask)
2959 return;
2960 addOperand(Mask);
2961 IsMasked = true;
2962 }
2963
2964 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2965 std::initializer_list<VPValue *> Operands,
2966 bool Consecutive, bool Reverse, DebugLoc DL)
2968 Reverse(Reverse) {
2969 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2970 }
2971
2972public:
2974 llvm_unreachable("cloning not supported");
2975 }
2976
2977 static inline bool classof(const VPRecipeBase *R) {
2978 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2979 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2980 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2981 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2982 }
2983
2984 static inline bool classof(const VPUser *U) {
2985 auto *R = dyn_cast<VPRecipeBase>(U);
2986 return R && classof(R);
2987 }
2988
2989 /// Return whether the loaded-from / stored-to addresses are consecutive.
2990 bool isConsecutive() const { return Consecutive; }
2991
2992 /// Return whether the consecutive loaded/stored addresses are in reverse
2993 /// order.
2994 bool isReverse() const { return Reverse; }
2995
2996 /// Return the address accessed by this recipe.
2997 VPValue *getAddr() const { return getOperand(0); }
2998
2999 /// Returns true if the recipe is masked.
3000 bool isMasked() const { return IsMasked; }
3001
3002 /// Return the mask used by this recipe. Note that a full mask is represented
3003 /// by a nullptr.
3004 VPValue *getMask() const {
3005 // Mask is optional and therefore the last operand.
3006 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
3007 }
3008
3009 /// Generate the wide load/store.
3010 void execute(VPTransformState &State) override {
3011 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
3012 }
3013
3014 /// Return the cost of this VPWidenMemoryRecipe.
3016 VPCostContext &Ctx) const override;
3017
3019};
3020
3021/// A recipe for widening load operations, using the address to load from and an
3022/// optional mask.
3023struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
3025 bool Consecutive, bool Reverse, DebugLoc DL)
3026 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
3027 Reverse, DL),
3028 VPValue(this, &Load) {
3029 setMask(Mask);
3030 }
3031
3033 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
3035 getDebugLoc());
3036 }
3037
3038 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
3039
3040 /// Generate a wide load or gather.
3041 void execute(VPTransformState &State) override;
3042
3043#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3044 /// Print the recipe.
3045 void print(raw_ostream &O, const Twine &Indent,
3046 VPSlotTracker &SlotTracker) const override;
3047#endif
3048
3049 /// Returns true if the recipe only uses the first lane of operand \p Op.
3050 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3052 "Op must be an operand of the recipe");
3053 // Widened, consecutive loads operations only demand the first lane of
3054 // their address.
3055 return Op == getAddr() && isConsecutive();
3056 }
3057};
3058
3059/// A recipe for widening load operations with vector-predication intrinsics,
3060/// using the address to load from, the explicit vector length and an optional
3061/// mask.
3062struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3064 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3065 {L.getAddr(), &EVL}, L.isConsecutive(),
3066 L.isReverse(), L.getDebugLoc()),
3067 VPValue(this, &getIngredient()) {
3068 setMask(Mask);
3069 }
3070
3071 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3072
3073 /// Return the EVL operand.
3074 VPValue *getEVL() const { return getOperand(1); }
3075
3076 /// Generate the wide load or gather.
3077 void execute(VPTransformState &State) override;
3078
3079 /// Return the cost of this VPWidenLoadEVLRecipe.
3081 VPCostContext &Ctx) const override;
3082
3083#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3084 /// Print the recipe.
3085 void print(raw_ostream &O, const Twine &Indent,
3086 VPSlotTracker &SlotTracker) const override;
3087#endif
3088
3089 /// Returns true if the recipe only uses the first lane of operand \p Op.
3090 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3092 "Op must be an operand of the recipe");
3093 // Widened loads only demand the first lane of EVL and consecutive loads
3094 // only demand the first lane of their address.
3095 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3096 }
3097};
3098
3099/// A recipe for widening store operations, using the stored value, the address
3100/// to store to and an optional mask.
3103 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3104 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3106 setMask(Mask);
3107 }
3108
3110 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3112 Reverse, getDebugLoc());
3113 }
3114
3115 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3116
3117 /// Return the value stored by this recipe.
3118 VPValue *getStoredValue() const { return getOperand(1); }
3119
3120 /// Generate a wide store or scatter.
3121 void execute(VPTransformState &State) override;
3122
3123#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3124 /// Print the recipe.
3125 void print(raw_ostream &O, const Twine &Indent,
3126 VPSlotTracker &SlotTracker) const override;
3127#endif
3128
3129 /// Returns true if the recipe only uses the first lane of operand \p Op.
3130 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3132 "Op must be an operand of the recipe");
3133 // Widened, consecutive stores only demand the first lane of their address,
3134 // unless the same operand is also stored.
3135 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3136 }
3137};
3138
3139/// A recipe for widening store operations with vector-predication intrinsics,
3140/// using the value to store, the address to store to, the explicit vector
3141/// length and an optional mask.
3144 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3145 {S.getAddr(), S.getStoredValue(), &EVL},
3146 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3147 setMask(Mask);
3148 }
3149
3150 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3151
3152 /// Return the address accessed by this recipe.
3153 VPValue *getStoredValue() const { return getOperand(1); }
3154
3155 /// Return the EVL operand.
3156 VPValue *getEVL() const { return getOperand(2); }
3157
3158 /// Generate the wide store or scatter.
3159 void execute(VPTransformState &State) override;
3160
3161 /// Return the cost of this VPWidenStoreEVLRecipe.
3163 VPCostContext &Ctx) const override;
3164
3165#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3166 /// Print the recipe.
3167 void print(raw_ostream &O, const Twine &Indent,
3168 VPSlotTracker &SlotTracker) const override;
3169#endif
3170
3171 /// Returns true if the recipe only uses the first lane of operand \p Op.
3172 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3174 "Op must be an operand of the recipe");
3175 if (Op == getEVL()) {
3176 assert(getStoredValue() != Op && "unexpected store of EVL");
3177 return true;
3178 }
3179 // Widened, consecutive memory operations only demand the first lane of
3180 // their address, unless the same operand is also stored. That latter can
3181 // happen with opaque pointers.
3182 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3183 }
3184};
3185
3186/// Recipe to expand a SCEV expression.
3188 const SCEV *Expr;
3189 ScalarEvolution &SE;
3190
3191public:
3193 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3194
3195 ~VPExpandSCEVRecipe() override = default;
3196
3198 return new VPExpandSCEVRecipe(Expr, SE);
3199 }
3200
3201 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3202
3203 /// Generate a canonical vector induction variable of the vector loop, with
3204 void execute(VPTransformState &State) override;
3205
3206 /// Return the cost of this VPExpandSCEVRecipe.
3208 VPCostContext &Ctx) const override {
3209 // TODO: Compute accurate cost after retiring the legacy cost model.
3210 return 0;
3211 }
3212
3213#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3214 /// Print the recipe.
3215 void print(raw_ostream &O, const Twine &Indent,
3216 VPSlotTracker &SlotTracker) const override;
3217#endif
3218
3219 const SCEV *getSCEV() const { return Expr; }
3220};
3221
3222/// Canonical scalar induction phi of the vector loop. Starting at the specified
3223/// start value (either 0 or the resume value when vectorizing the epilogue
3224/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3225/// canonical induction variable.
3227public:
3229 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3230
3231 ~VPCanonicalIVPHIRecipe() override = default;
3232
3234 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3235 R->addOperand(getBackedgeValue());
3236 return R;
3237 }
3238
3239 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3240
3242 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3243 }
3244
3245 void execute(VPTransformState &State) override {
3247 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3248 }
3249
3250#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3251 /// Print the recipe.
3252 void print(raw_ostream &O, const Twine &Indent,
3253 VPSlotTracker &SlotTracker) const override;
3254#endif
3255
3256 /// Returns the scalar type of the induction.
3258 return getStartValue()->getLiveInIRValue()->getType();
3259 }
3260
3261 /// Returns true if the recipe only uses the first lane of operand \p Op.
3262 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3264 "Op must be an operand of the recipe");
3265 return true;
3266 }
3267
3268 /// Returns true if the recipe only uses the first part of operand \p Op.
3269 bool onlyFirstPartUsed(const VPValue *Op) const override {
3271 "Op must be an operand of the recipe");
3272 return true;
3273 }
3274
3275 /// Return the cost of this VPCanonicalIVPHIRecipe.
3277 VPCostContext &Ctx) const override {
3278 // For now, match the behavior of the legacy cost model.
3279 return 0;
3280 }
3281};
3282
3283/// A recipe for generating the active lane mask for the vector loop that is
3284/// used to predicate the vector operations.
3285/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3286/// remove VPActiveLaneMaskPHIRecipe.
3288public:
3290 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3291 DL) {}
3292
3293 ~VPActiveLaneMaskPHIRecipe() override = default;
3294
3297 if (getNumOperands() == 2)
3298 R->addOperand(getOperand(1));
3299 return R;
3300 }
3301
3302 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3303
3305 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3306 }
3307
3308 /// Generate the active lane mask phi of the vector loop.
3309 void execute(VPTransformState &State) override;
3310
3311#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3312 /// Print the recipe.
3313 void print(raw_ostream &O, const Twine &Indent,
3314 VPSlotTracker &SlotTracker) const override;
3315#endif
3316};
3317
3318/// A recipe for generating the phi node for the current index of elements,
3319/// adjusted in accordance with EVL value. It starts at the start value of the
3320/// canonical induction and gets incremented by EVL in each iteration of the
3321/// vector loop.
3323public:
3325 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3326
3327 ~VPEVLBasedIVPHIRecipe() override = default;
3328
3330 llvm_unreachable("cloning not implemented yet");
3331 }
3332
3333 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3334
3336 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3337 }
3338
3339 void execute(VPTransformState &State) override {
3341 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3342 }
3343
3344 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3346 VPCostContext &Ctx) const override {
3347 // For now, match the behavior of the legacy cost model.
3348 return 0;
3349 }
3350
3351 /// Returns true if the recipe only uses the first lane of operand \p Op.
3352 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3354 "Op must be an operand of the recipe");
3355 return true;
3356 }
3357
3358#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3359 /// Print the recipe.
3360 void print(raw_ostream &O, const Twine &Indent,
3361 VPSlotTracker &SlotTracker) const override;
3362#endif
3363};
3364
3365/// A Recipe for widening the canonical induction variable of the vector loop.
3367 public VPUnrollPartAccessor<1> {
3368public:
3370 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3371
3372 ~VPWidenCanonicalIVRecipe() override = default;
3373
3375 return new VPWidenCanonicalIVRecipe(
3376 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3377 }
3378
3379 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3380
3381 /// Generate a canonical vector induction variable of the vector loop, with
3382 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3383 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3384 void execute(VPTransformState &State) override;
3385
3386 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3388 VPCostContext &Ctx) const override {
3389 // TODO: Compute accurate cost after retiring the legacy cost model.
3390 return 0;
3391 }
3392
3393#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3394 /// Print the recipe.
3395 void print(raw_ostream &O, const Twine &Indent,
3396 VPSlotTracker &SlotTracker) const override;
3397#endif
3398};
3399
3400/// A recipe for converting the input value \p IV value to the corresponding
3401/// value of an IV with different start and step values, using Start + IV *
3402/// Step.
3404 /// Kind of the induction.
3406 /// If not nullptr, the floating point induction binary operator. Must be set
3407 /// for floating point inductions.
3408 const FPMathOperator *FPBinOp;
3409
3410 /// Name to use for the generated IR instruction for the derived IV.
3411 std::string Name;
3412
3413public:
3415 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3416 const Twine &Name = "")
3418 IndDesc.getKind(),
3419 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3420 Start, CanonicalIV, Step, Name) {}
3421
3423 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3424 VPValue *Step, const Twine &Name = "")
3425 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3426 FPBinOp(FPBinOp), Name(Name.str()) {}
3427
3428 ~VPDerivedIVRecipe() override = default;
3429
3431 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3432 getStepValue());
3433 }
3434
3435 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3436
3437 /// Generate the transformed value of the induction at offset StartValue (1.
3438 /// operand) + IV (2. operand) * StepValue (3, operand).
3439 void execute(VPTransformState &State) override;
3440
3441 /// Return the cost of this VPDerivedIVRecipe.
3443 VPCostContext &Ctx) const override {
3444 // TODO: Compute accurate cost after retiring the legacy cost model.
3445 return 0;
3446 }
3447
3448#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3449 /// Print the recipe.
3450 void print(raw_ostream &O, const Twine &Indent,
3451 VPSlotTracker &SlotTracker) const override;
3452#endif
3453
3455 return getStartValue()->getLiveInIRValue()->getType();
3456 }
3457
3458 VPValue *getStartValue() const { return getOperand(0); }
3459 VPValue *getStepValue() const { return getOperand(2); }
3460
3461 /// Returns true if the recipe only uses the first lane of operand \p Op.
3462 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3464 "Op must be an operand of the recipe");
3465 return true;
3466 }
3467};
3468
3469/// A recipe for handling phi nodes of integer and floating-point inductions,
3470/// producing their scalar values.
3472 public VPUnrollPartAccessor<2> {
3473 Instruction::BinaryOps InductionOpcode;
3474
3475public:
3478 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3479 ArrayRef<VPValue *>({IV, Step}), FMFs),
3480 InductionOpcode(Opcode) {}
3481
3483 VPValue *Step)
3485 IV, Step, IndDesc.getInductionOpcode(),
3486 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3487 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3488 : FastMathFlags()) {}
3489
3490 ~VPScalarIVStepsRecipe() override = default;
3491
3493 return new VPScalarIVStepsRecipe(
3494 getOperand(0), getOperand(1), InductionOpcode,
3496 }
3497
3498 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3499
3500 /// Generate the scalarized versions of the phi node as needed by their users.
3501 void execute(VPTransformState &State) override;
3502
3503 /// Return the cost of this VPScalarIVStepsRecipe.
3505 VPCostContext &Ctx) const override {
3506 // TODO: Compute accurate cost after retiring the legacy cost model.
3507 return 0;
3508 }
3509
3510#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3511 /// Print the recipe.
3512 void print(raw_ostream &O, const Twine &Indent,
3513 VPSlotTracker &SlotTracker) const override;
3514#endif
3515
3516 VPValue *getStepValue() const { return getOperand(1); }
3517
3518 /// Returns true if the recipe only uses the first lane of operand \p Op.
3519 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3521 "Op must be an operand of the recipe");
3522 return true;
3523 }
3524};
3525
3526/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3527/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3528/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3530public:
3532
3533protected:
3534 /// The VPRecipes held in the order of output instructions to generate.
3536
3537 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3538 : VPBlockBase(BlockSC, Name.str()) {}
3539
3540public:
3541 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3542 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3543 if (Recipe)
3544 appendRecipe(Recipe);
3545 }
3546
3547 ~VPBasicBlock() override {
3548 while (!Recipes.empty())
3549 Recipes.pop_back();
3550 }
3551
3552 /// Instruction iterators...
3557
3558 //===--------------------------------------------------------------------===//
3559 /// Recipe iterator methods
3560 ///
3561 inline iterator begin() { return Recipes.begin(); }
3562 inline const_iterator begin() const { return Recipes.begin(); }
3563 inline iterator end() { return Recipes.end(); }
3564 inline const_iterator end() const { return Recipes.end(); }
3565
3566 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3567 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3568 inline reverse_iterator rend() { return Recipes.rend(); }
3569 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3570
3571 inline size_t size() const { return Recipes.size(); }
3572 inline bool empty() const { return Recipes.empty(); }
3573 inline const VPRecipeBase &front() const { return Recipes.front(); }
3574 inline VPRecipeBase &front() { return Recipes.front(); }
3575 inline const VPRecipeBase &back() const { return Recipes.back(); }
3576 inline VPRecipeBase &back() { return Recipes.back(); }
3577
3578 /// Returns a reference to the list of recipes.
3580
3581 /// Returns a pointer to a member of the recipe list.
3583 return &VPBasicBlock::Recipes;
3584 }
3585
3586 /// Method to support type inquiry through isa, cast, and dyn_cast.
3587 static inline bool classof(const VPBlockBase *V) {
3588 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3589 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3590 }
3591
3592 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3593 assert(Recipe && "No recipe to append.");
3594 assert(!Recipe->Parent && "Recipe already in VPlan");
3595 Recipe->Parent = this;
3596 Recipes.insert(InsertPt, Recipe);
3597 }
3598
3599 /// Augment the existing recipes of a VPBasicBlock with an additional
3600 /// \p Recipe as the last recipe.
3601 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3602
3603 /// The method which generates the output IR instructions that correspond to
3604 /// this VPBasicBlock, thereby "executing" the VPlan.
3605 void execute(VPTransformState *State) override;
3606
3607 /// Return the cost of this VPBasicBlock.
3609
3610 /// Return the position of the first non-phi node recipe in the block.
3612
3613 /// Returns an iterator range over the PHI-like recipes in the block.
3615 return make_range(begin(), getFirstNonPhi());
3616 }
3617
3618 /// Split current block at \p SplitAt by inserting a new block between the
3619 /// current block and its successors and moving all recipes starting at
3620 /// SplitAt to the new block. Returns the new block.
3621 VPBasicBlock *splitAt(iterator SplitAt);
3622
3625
3626#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3627 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3628 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3629 ///
3630 /// Note that the numbering is applied to the whole VPlan, so printing
3631 /// individual blocks is consistent with the whole VPlan printing.
3632 void print(raw_ostream &O, const Twine &Indent,
3633 VPSlotTracker &SlotTracker) const override;
3634 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3635#endif
3636
3637 /// If the block has multiple successors, return the branch recipe terminating
3638 /// the block. If there are no or only a single successor, return nullptr;
3640 const VPRecipeBase *getTerminator() const;
3641
3642 /// Returns true if the block is exiting it's parent region.
3643 bool isExiting() const;
3644
3645 /// Clone the current block and it's recipes, without updating the operands of
3646 /// the cloned recipes.
3647 VPBasicBlock *clone() override;
3648
3649protected:
3650 /// Execute the recipes in the IR basic block \p BB.
3651 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3652
3653 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3654 /// generated for this VPBB.
3656
3657private:
3658 /// Create an IR BasicBlock to hold the output instructions generated by this
3659 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3660 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3661};
3662
3663/// A special type of VPBasicBlock that wraps an existing IR basic block.
3664/// Recipes of the block get added before the first non-phi instruction in the
3665/// wrapped block.
3666/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3667/// preheader block.
3669 BasicBlock *IRBB;
3670
3671public:
3673 : VPBasicBlock(VPIRBasicBlockSC,
3674 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3675 IRBB(IRBB) {}
3676
3677 ~VPIRBasicBlock() override {}
3678
3679 static inline bool classof(const VPBlockBase *V) {
3680 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3681 }
3682
3683 /// The method which generates the output IR instructions that correspond to
3684 /// this VPBasicBlock, thereby "executing" the VPlan.
3685 void execute(VPTransformState *State) override;
3686
3687 VPIRBasicBlock *clone() override;
3688
3689 BasicBlock *getIRBasicBlock() const { return IRBB; }
3690};
3691
3692/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3693/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3694/// A VPRegionBlock may indicate that its contents are to be replicated several
3695/// times. This is designed to support predicated scalarization, in which a
3696/// scalar if-then code structure needs to be generated VF * UF times. Having
3697/// this replication indicator helps to keep a single model for multiple
3698/// candidate VF's. The actual replication takes place only once the desired VF
3699/// and UF have been determined.
3701 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3702 VPBlockBase *Entry;
3703
3704 /// Hold the Single Exiting block of the SESE region modelled by the
3705 /// VPRegionBlock.
3706 VPBlockBase *Exiting;
3707
3708 /// An indicator whether this region is to generate multiple replicated
3709 /// instances of output IR corresponding to its VPBlockBases.
3710 bool IsReplicator;
3711
3712public:
3714 const std::string &Name = "", bool IsReplicator = false)
3715 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3716 IsReplicator(IsReplicator) {
3717 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3718 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3719 Entry->setParent(this);
3720 Exiting->setParent(this);
3721 }
3722 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3723 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3724 IsReplicator(IsReplicator) {}
3725
3726 ~VPRegionBlock() override {}
3727
3728 /// Method to support type inquiry through isa, cast, and dyn_cast.
3729 static inline bool classof(const VPBlockBase *V) {
3730 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3731 }
3732
3733 const VPBlockBase *getEntry() const { return Entry; }
3734 VPBlockBase *getEntry() { return Entry; }
3735
3736 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3737 /// EntryBlock must have no predecessors.
3738 void setEntry(VPBlockBase *EntryBlock) {
3739 assert(EntryBlock->getPredecessors().empty() &&
3740 "Entry block cannot have predecessors.");
3741 Entry = EntryBlock;
3742 EntryBlock->setParent(this);
3743 }
3744
3745 const VPBlockBase *getExiting() const { return Exiting; }
3746 VPBlockBase *getExiting() { return Exiting; }
3747
3748 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3749 /// ExitingBlock must have no successors.
3750 void setExiting(VPBlockBase *ExitingBlock) {
3751 assert(ExitingBlock->getSuccessors().empty() &&
3752 "Exit block cannot have successors.");
3753 Exiting = ExitingBlock;
3754 ExitingBlock->setParent(this);
3755 }
3756
3757 /// Returns the pre-header VPBasicBlock of the loop region.
3759 assert(!isReplicator() && "should only get pre-header of loop regions");
3761 }
3762
3763 /// An indicator whether this region is to generate multiple replicated
3764 /// instances of output IR corresponding to its VPBlockBases.
3765 bool isReplicator() const { return IsReplicator; }
3766
3767 /// The method which generates the output IR instructions that correspond to
3768 /// this VPRegionBlock, thereby "executing" the VPlan.
3769 void execute(VPTransformState *State) override;
3770
3771 // Return the cost of this region.
3773
3774#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3775 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3776 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3777 /// consequtive numbers.
3778 ///
3779 /// Note that the numbering is applied to the whole VPlan, so printing
3780 /// individual regions is consistent with the whole VPlan printing.
3781 void print(raw_ostream &O, const Twine &Indent,
3782 VPSlotTracker &SlotTracker) const override;
3783 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3784#endif
3785
3786 /// Clone all blocks in the single-entry single-exit region of the block and
3787 /// their recipes without updating the operands of the cloned recipes.
3788 VPRegionBlock *clone() override;
3789};
3790
3791/// VPlan models a candidate for vectorization, encoding various decisions take
3792/// to produce efficient output IR, including which branches, basic-blocks and
3793/// output IR instructions to generate, and their cost. VPlan holds a
3794/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3795/// VPBasicBlock.
3796class VPlan {
3797 friend class VPlanPrinter;
3798 friend class VPSlotTracker;
3799
3800 /// VPBasicBlock corresponding to the original preheader. Used to place
3801 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3802 /// rest of VPlan execution.
3803 /// When this VPlan is used for the epilogue vector loop, the entry will be
3804 /// replaced by a new entry block created during skeleton creation.
3805 VPBasicBlock *Entry;
3806
3807 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3808 VPIRBasicBlock *ScalarHeader;
3809
3810 /// Holds the VFs applicable to this VPlan.
3812
3813 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3814 /// any UF.
3816
3817 /// Holds the name of the VPlan, for printing.
3818 std::string Name;
3819
3820 /// Represents the trip count of the original loop, for folding
3821 /// the tail.
3822 VPValue *TripCount = nullptr;
3823
3824 /// Represents the backedge taken count of the original loop, for folding
3825 /// the tail. It equals TripCount - 1.
3826 VPValue *BackedgeTakenCount = nullptr;
3827
3828 /// Represents the vector trip count.
3829 VPValue VectorTripCount;
3830
3831 /// Represents the vectorization factor of the loop.
3832 VPValue VF;
3833
3834 /// Represents the loop-invariant VF * UF of the vector loop region.
3835 VPValue VFxUF;
3836
3837 /// Holds a mapping between Values and their corresponding VPValue inside
3838 /// VPlan.
3839 Value2VPValueTy Value2VPValue;
3840
3841 /// Contains all the external definitions created for this VPlan. External
3842 /// definitions are VPValues that hold a pointer to their underlying IR.
3843 SmallVector<VPValue *, 16> VPLiveInsToFree;
3844
3845 /// Mapping from SCEVs to the VPValues representing their expansions.
3846 /// NOTE: This mapping is temporary and will be removed once all users have
3847 /// been modeled in VPlan directly.
3848 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3849
3850 /// Blocks allocated and owned by the VPlan. They will be deleted once the
3851 /// VPlan is destroyed.
3852 SmallVector<VPBlockBase *> CreatedBlocks;
3853
3854 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3855 /// wrapping the original header of the scalar loop.
3856 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3857 : Entry(Entry), ScalarHeader(ScalarHeader) {
3858 Entry->setPlan(this);
3859 assert(ScalarHeader->getNumSuccessors() == 0 &&
3860 "scalar header must be a leaf node");
3861 }
3862
3863public:
3864 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3865 /// original preheader and scalar header of \p L, to be used as entry and
3866 /// scalar header blocks of the new VPlan.
3867 VPlan(Loop *L);
3868
3869 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
3870 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
3871 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
3872 setEntry(createVPBasicBlock("preheader"));
3873 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
3874 TripCount = TC;
3875 }
3876
3877 ~VPlan();
3878
3880 Entry = VPBB;
3881 VPBB->setPlan(this);
3882 }
3883
3884 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3885 /// original scalar pre-header) which contains SCEV expansions that need
3886 /// to happen before the CFG is modified (when executing a VPlan for the
3887 /// epilogue vector loop, the original entry needs to be replaced by a new
3888 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3889 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3890 /// to guard executing the scalar epilogue loop, it will be added to the
3891 /// middle block, together with VPBasicBlocks for the scalar preheader and
3892 /// exit blocks. \p InductionTy is the type of the canonical induction and
3893 /// used for related values, like the trip count expression.
3894 static VPlanPtr createInitialVPlan(Type *InductionTy,
3896 bool RequiresScalarEpilogueCheck,
3897 bool TailFolded, Loop *TheLoop);
3898
3899 /// Prepare the plan for execution, setting up the required live-in values.
3900 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3901 VPTransformState &State);
3902
3903 /// Generate the IR code for this VPlan.
3904 void execute(VPTransformState *State);
3905
3906 /// Return the cost of this plan.
3908
3909 VPBasicBlock *getEntry() { return Entry; }
3910 const VPBasicBlock *getEntry() const { return Entry; }
3911
3912 /// Returns the preheader of the vector loop region, if one exists, or null
3913 /// otherwise.
3915 VPRegionBlock *VectorRegion = getVectorLoopRegion();
3916 return VectorRegion
3917 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
3918 : nullptr;
3919 }
3920
3921 /// Returns the VPRegionBlock of the vector loop.
3923 const VPRegionBlock *getVectorLoopRegion() const;
3924
3925 /// Returns the 'middle' block of the plan, that is the block that selects
3926 /// whether to execute the scalar tail loop or the exit block from the loop
3927 /// latch.
3929 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3930 }
3932 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3933 }
3934
3935 /// Return the VPBasicBlock for the preheader of the scalar loop.
3937 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3938 }
3939
3940 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3941 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3942
3943 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3944 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3945 /// VPlanHCFG, as the definition of the type needs access to the definitions
3946 /// of VPBlockShallowTraversalWrapper.
3947 auto getExitBlocks();
3948
3949 /// The trip count of the original loop.
3951 assert(TripCount && "trip count needs to be set before accessing it");
3952 return TripCount;
3953 }
3954
3955 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3956 /// the original trip count have been replaced.
3957 void resetTripCount(VPValue *NewTripCount) {
3958 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3959 "TripCount always must be set");
3960 TripCount = NewTripCount;
3961 }
3962
3963 /// The backedge taken count of the original loop.
3965 if (!BackedgeTakenCount)
3966 BackedgeTakenCount = new VPValue();
3967 return BackedgeTakenCount;
3968 }
3969
3970 /// The vector trip count.
3971 VPValue &getVectorTripCount() { return VectorTripCount; }
3972
3973 /// Returns the VF of the vector loop region.
3974 VPValue &getVF() { return VF; };
3975
3976 /// Returns VF * UF of the vector loop region.
3977 VPValue &getVFxUF() { return VFxUF; }
3978
3979 void addVF(ElementCount VF) { VFs.insert(VF); }
3980
3982 assert(hasVF(VF) && "Cannot set VF not already in plan");
3983 VFs.clear();
3984 VFs.insert(VF);
3985 }
3986
3987 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3989 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3990 }
3991
3992 /// Returns an iterator range over all VFs of the plan.
3995 return {VFs.begin(), VFs.end()};
3996 }
3997
3998 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3999
4000 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
4001
4002 unsigned getUF() const {
4003 assert(UFs.size() == 1 && "Expected a single UF");
4004 return UFs[0];
4005 }
4006
4007 void setUF(unsigned UF) {
4008 assert(hasUF(UF) && "Cannot set the UF not already in plan");
4009 UFs.clear();
4010 UFs.insert(UF);
4011 }
4012
4013 /// Return a string with the name of the plan and the applicable VFs and UFs.
4014 std::string getName() const;
4015
4016 void setName(const Twine &newName) { Name = newName.str(); }
4017
4018 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
4019 /// yet) for \p V.
4021 assert(V && "Trying to get or add the VPValue of a null Value");
4022 if (!Value2VPValue.count(V)) {
4023 VPValue *VPV = new VPValue(V);
4024 VPLiveInsToFree.push_back(VPV);
4025 assert(VPV->isLiveIn() && "VPV must be a live-in.");
4026 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
4027 Value2VPValue[V] = VPV;
4028 }
4029
4030 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
4031 assert(Value2VPValue[V]->isLiveIn() &&
4032 "Only live-ins should be in mapping");
4033 return Value2VPValue[V];
4034 }
4035
4036 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
4037 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
4038
4039#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4040 /// Print the live-ins of this VPlan to \p O.
4041 void printLiveIns(raw_ostream &O) const;
4042
4043 /// Print this VPlan to \p O.
4044 void print(raw_ostream &O) const;
4045
4046 /// Print this VPlan in DOT format to \p O.
4047 void printDOT(raw_ostream &O) const;
4048
4049 /// Dump the plan to stderr (for debugging).
4050 LLVM_DUMP_METHOD void dump() const;
4051#endif
4052
4053 /// Returns the canonical induction recipe of the vector loop.
4056 if (EntryVPBB->empty()) {
4057 // VPlan native path.
4058 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4059 }
4060 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4061 }
4062
4063 VPValue *getSCEVExpansion(const SCEV *S) const {
4064 return SCEVToExpansion.lookup(S);
4065 }
4066
4067 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4068 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4069 SCEVToExpansion[S] = V;
4070 }
4071
4072 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4073 /// recipes to refer to the clones, and return it.
4074 VPlan *duplicate();
4075
4076 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4077 /// present. The returned block is owned by the VPlan and deleted once the
4078 /// VPlan is destroyed.
4080 VPRecipeBase *Recipe = nullptr) {
4081 auto *VPB = new VPBasicBlock(Name, Recipe);
4082 CreatedBlocks.push_back(VPB);
4083 return VPB;
4084 }
4085
4086 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
4087 /// IsReplicator is true, the region is a replicate region. The returned block
4088 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4090 const std::string &Name = "",
4091 bool IsReplicator = false) {
4092 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
4093 CreatedBlocks.push_back(VPB);
4094 return VPB;
4095 }
4096
4097 /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set
4098 /// to nullptr. If \p IsReplicator is true, the region is a replicate region.
4099 /// The returned block is owned by the VPlan and deleted once the VPlan is
4100 /// destroyed.
4101 VPRegionBlock *createVPRegionBlock(const std::string &Name = "",
4102 bool IsReplicator = false) {
4103 auto *VPB = new VPRegionBlock(Name, IsReplicator);
4104 CreatedBlocks.push_back(VPB);
4105 return VPB;
4106 }
4107
4108 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4109 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4110 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4112
4113 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4114 /// instructions in \p IRBB, except its terminator which is managed by the
4115 /// successors of the block in VPlan. The returned block is owned by the VPlan
4116 /// and deleted once the VPlan is destroyed.
4118};
4119
4120#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4121/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4122/// indented and follows the dot format.
4124 raw_ostream &OS;
4125 const VPlan &Plan;
4126 unsigned Depth = 0;
4127 unsigned TabWidth = 2;
4128 std::string Indent;
4129 unsigned BID = 0;
4131
4133
4134 /// Handle indentation.
4135 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4136
4137 /// Print a given \p Block of the Plan.
4138 void dumpBlock(const VPBlockBase *Block);
4139
4140 /// Print the information related to the CFG edges going out of a given
4141 /// \p Block, followed by printing the successor blocks themselves.
4142 void dumpEdges(const VPBlockBase *Block);
4143
4144 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4145 /// its successor blocks.
4146 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4147
4148 /// Print a given \p Region of the Plan.
4149 void dumpRegion(const VPRegionBlock *Region);
4150
4151 unsigned getOrCreateBID(const VPBlockBase *Block) {
4152 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4153 }
4154
4155 Twine getOrCreateName(const VPBlockBase *Block);
4156
4157 Twine getUID(const VPBlockBase *Block);
4158
4159 /// Print the information related to a CFG edge between two VPBlockBases.
4160 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4161 const Twine &Label);
4162
4163public:
4165 : OS(O), Plan(P), SlotTracker(&P) {}
4166
4167 LLVM_DUMP_METHOD void dump();
4168};
4169
4171 const Value *V;
4172
4173 VPlanIngredient(const Value *V) : V(V) {}
4174
4175 void print(raw_ostream &O) const;
4176};
4177
4179 I.print(OS);
4180 return OS;
4181}
4182
4184 Plan.print(OS);
4185 return OS;
4186}
4187#endif
4188
4189//===----------------------------------------------------------------------===//
4190// VPlan Utilities
4191//===----------------------------------------------------------------------===//
4192
4193/// Class that provides utilities for VPBlockBases in VPlan.
4195public:
4196 VPBlockUtils() = delete;
4197
4198 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
4199 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
4200 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
4201 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
4202 /// have neither successors nor predecessors.
4203 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4204 assert(NewBlock->getSuccessors().empty() &&
4205 NewBlock->getPredecessors().empty() &&
4206 "Can't insert new block with predecessors or successors.");
4207 NewBlock->setParent(BlockPtr->getParent());
4208 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
4209 for (VPBlockBase *Succ : Succs) {
4210 disconnectBlocks(BlockPtr, Succ);
4211 connectBlocks(NewBlock, Succ);
4212 }
4213 connectBlocks(BlockPtr, NewBlock);
4214 }
4215
4216 /// Insert disconnected block \p NewBlock before \p Blockptr. First
4217 /// disconnects all predecessors of \p BlockPtr and connects them to \p
4218 /// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as
4219 /// successor of \p NewBlock.
4220 static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4221 assert(NewBlock->getSuccessors().empty() &&
4222 NewBlock->getPredecessors().empty() &&
4223 "Can't insert new block with predecessors or successors.");
4224 NewBlock->setParent(BlockPtr->getParent());
4225 for (VPBlockBase *Pred : to_vector(BlockPtr->predecessors())) {
4226 disconnectBlocks(Pred, BlockPtr);
4227 connectBlocks(Pred, NewBlock);
4228 }
4229 connectBlocks(NewBlock, BlockPtr);
4230 }
4231
4232 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
4233 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
4234 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
4235 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
4236 /// and \p IfTrue and \p IfFalse must have neither successors nor
4237 /// predecessors.
4238 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
4239 VPBlockBase *BlockPtr) {
4240 assert(IfTrue->getSuccessors().empty() &&
4241 "Can't insert IfTrue with successors.");
4242 assert(IfFalse->getSuccessors().empty() &&
4243 "Can't insert IfFalse with successors.");
4244 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
4245 IfTrue->setPredecessors({BlockPtr});
4246 IfFalse->setPredecessors({BlockPtr});
4247 IfTrue->setParent(BlockPtr->getParent());
4248 IfFalse->setParent(BlockPtr->getParent());
4249 }
4250
4251 /// Connect VPBlockBases \p From and \p To bi-directionally. If \p PredIdx is
4252 /// -1, append \p From to the predecessors of \p To, otherwise set \p To's
4253 /// predecessor at \p PredIdx to \p From. If \p SuccIdx is -1, append \p To to
4254 /// the successors of \p From, otherwise set \p From's successor at \p SuccIdx
4255 /// to \p To. Both VPBlockBases must have the same parent, which can be null.
4256 /// Both VPBlockBases can be already connected to other VPBlockBases.
4258 unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
4259 assert((From->getParent() == To->getParent()) &&
4260 "Can't connect two block with different parents");
4261 assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
4262 "Blocks can't have more than two successors.");
4263 if (SuccIdx == -1u)
4264 From->appendSuccessor(To);
4265 else
4266 From->getSuccessors()[SuccIdx] = To;
4267
4268 if (PredIdx == -1u)
4269 To->appendPredecessor(From);
4270 else
4271 To->getPredecessors()[PredIdx] = From;
4272 }
4273
4274 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
4275 /// from the successors of \p From and \p From from the predecessors of \p To.
4277 assert(To && "Successor to disconnect is null.");
4278 From->removeSuccessor(To);
4279 To->removePredecessor(From);
4280 }
4281
4282 /// Reassociate all the blocks connected to \p Old so that they now point to
4283 /// \p New.
4285 for (auto *Pred : to_vector(Old->getPredecessors()))
4286 Pred->replaceSuccessor(Old, New);
4287 for (auto *Succ : to_vector(Old->getSuccessors()))
4288 Succ->replacePredecessor(Old, New);
4289 New->setPredecessors(Old->getPredecessors());
4290 New->setSuccessors(Old->getSuccessors());
4291 Old->clearPredecessors();
4292 Old->clearSuccessors();
4293 }
4294
4295 /// Return an iterator range over \p Range which only includes \p BlockTy
4296 /// blocks. The accesses are casted to \p BlockTy.
4297 template <typename BlockTy, typename T>
4298 static auto blocksOnly(const T &Range) {
4299 // Create BaseTy with correct const-ness based on BlockTy.
4300 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
4301 const VPBlockBase, VPBlockBase>;
4302
4303 // We need to first create an iterator range over (const) BlocktTy & instead
4304 // of (const) BlockTy * for filter_range to work properly.
4305 auto Mapped =
4306 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
4308 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
4309 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
4310 return cast<BlockTy>(&Block);
4311 });
4312 }
4313
4314 /// Inserts \p BlockPtr on the edge between \p From and \p To. That is, update
4315 /// \p From's successor to \p To to point to \p BlockPtr and \p To's
4316 /// predecessor from \p From to \p BlockPtr. \p From and \p To are added to \p
4317 /// BlockPtr's predecessors and successors respectively. There must be a
4318 /// single edge between \p From and \p To.
4320 VPBlockBase *BlockPtr) {
4321 auto &Successors = From->getSuccessors();
4322 auto &Predecessors = To->getPredecessors();
4323 assert(count(Successors, To) == 1 && count(Predecessors, From) == 1 &&
4324 "must have single between From and To");
4325 unsigned SuccIdx = std::distance(Successors.begin(), find(Successors, To));
4326 unsigned PredIx =
4327 std::distance(Predecessors.begin(), find(Predecessors, From));
4328 VPBlockUtils::connectBlocks(From, BlockPtr, -1, SuccIdx);
4329 VPBlockUtils::connectBlocks(BlockPtr, To, PredIx, -1);
4330 }
4331};
4332
4335 InterleaveGroupMap;
4336
4337 /// Type for mapping of instruction based interleave groups to VPInstruction
4338 /// interleave groups
4341
4342 /// Recursively \p Region and populate VPlan based interleave groups based on
4343 /// \p IAI.
4344 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4346 /// Recursively traverse \p Block and populate VPlan based interleave groups
4347 /// based on \p IAI.
4348 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4350
4351public:
4353
4356 // Avoid releasing a pointer twice.
4357 for (auto &I : InterleaveGroupMap)
4358 DelSet.insert(I.second);
4359 for (auto *Ptr : DelSet)
4360 delete Ptr;
4361 }
4362
4363 /// Get the interleave group that \p Instr belongs to.
4364 ///
4365 /// \returns nullptr if doesn't have such group.
4368 return InterleaveGroupMap.lookup(Instr);
4369 }
4370};
4371
4372/// Class that maps (parts of) an existing VPlan to trees of combined
4373/// VPInstructions.
4375 enum class OpMode { Failed, Load, Opcode };
4376
4377 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4378 /// DenseMap keys.
4379 struct BundleDenseMapInfo {
4380 static SmallVector<VPValue *, 4> getEmptyKey() {
4381 return {reinterpret_cast<VPValue *>(-1)};
4382 }
4383
4384 static SmallVector<VPValue *, 4> getTombstoneKey() {
4385 return {reinterpret_cast<VPValue *>(-2)};
4386 }
4387
4388 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4389 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4390 }
4391
4392 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4394 return LHS == RHS;
4395 }
4396 };
4397
4398 /// Mapping of values in the original VPlan to a combined VPInstruction.
4400 BundleToCombined;
4401
4403
4404 /// Basic block to operate on. For now, only instructions in a single BB are
4405 /// considered.
4406 const VPBasicBlock &BB;
4407
4408 /// Indicates whether we managed to combine all visited instructions or not.
4409 bool CompletelySLP = true;
4410
4411 /// Width of the widest combined bundle in bits.
4412 unsigned WidestBundleBits = 0;
4413
4414 using MultiNodeOpTy =
4415 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4416
4417 // Input operand bundles for the current multi node. Each multi node operand
4418 // bundle contains values not matching the multi node's opcode. They will
4419 // be reordered in reorderMultiNodeOps, once we completed building a
4420 // multi node.
4421 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4422
4423 /// Indicates whether we are building a multi node currently.
4424 bool MultiNodeActive = false;
4425
4426 /// Check if we can vectorize Operands together.
4427 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4428
4429 /// Add combined instruction \p New for the bundle \p Operands.
4430 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4431
4432 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4433 VPInstruction *markFailed();
4434
4435 /// Reorder operands in the multi node to maximize sequential memory access
4436 /// and commutative operations.
4437 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4438
4439 /// Choose the best candidate to use for the lane after \p Last. The set of
4440 /// candidates to choose from are values with an opcode matching \p Last's
4441 /// or loads consecutive to \p Last.
4442 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4443 SmallPtrSetImpl<VPValue *> &Candidates,
4445
4446#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4447 /// Print bundle \p Values to dbgs().
4448 void dumpBundle(ArrayRef<VPValue *> Values);
4449#endif
4450
4451public:
4452 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4453
4454 ~VPlanSlp() = default;
4455
4456 /// Tries to build an SLP tree rooted at \p Operands and returns a
4457 /// VPInstruction combining \p Operands, if they can be combined.
4459
4460 /// Return the width of the widest combined bundle in bits.
4461 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4462
4463 /// Return true if all visited instruction can be combined.
4464 bool isCompletelySLP() const { return CompletelySLP; }
4465};
4466} // end namespace llvm
4467
4468#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:819
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:488
uint32_t getFactor() const
Definition: VectorUtils.h:504
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:558
InstTy * getInsertPos() const
Definition: VectorUtils.h:574
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:630
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:198
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:195
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:121
ElementCount operator*() const
Definition: VPlan.h:129
iterator & operator++()
Definition: VPlan.h:131
iterator(ElementCount VF)
Definition: VPlan.h:125
bool operator==(const iterator &Other) const
Definition: VPlan.h:127
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3287
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3295
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3304
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3289
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3529
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3554
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3601
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3556
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3553
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition: VPlan.cpp:415
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:480
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3579
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3537
iterator end()
Definition: VPlan.h:3563
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:3541
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3561
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3555
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:517
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3614
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:758
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:208
~VPBasicBlock() override
Definition: VPlan.h:3547
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:566
const_reverse_iterator rbegin() const
Definition: VPlan.h:3567
reverse_iterator rend()
Definition: VPlan.h:3568
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:536
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3535
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:524
VPRecipeBase & back()
Definition: VPlan.h:3576
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:631
const VPRecipeBase & front() const
Definition: VPlan.h:3573
const_iterator begin() const
Definition: VPlan.h:3562
VPRecipeBase & front()
Definition: VPlan.h:3574
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:614
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:602
const VPRecipeBase & back() const
Definition: VPlan.h:3575
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3592
bool empty() const
Definition: VPlan.h:3572
const_iterator end() const
Definition: VPlan.h:3564
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3587
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3582
reverse_iterator rbegin()
Definition: VPlan.h:3566
size_t size() const
Definition: VPlan.h:3571
const_reverse_iterator rend() const
Definition: VPlan.h:3569
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2487
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2493
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2540
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2516
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2521
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2511
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2498
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2507
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:397
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:613
VPRegionBlock * getParent()
Definition: VPlan.h:489
VPBlocksTy & getPredecessors()
Definition: VPlan.h:521
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:518
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:672
void setName(const Twine &newName)
Definition: VPlan.h:482
size_t getNumSuccessors() const
Definition: VPlan.h:535
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:517
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:628
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:619
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:641
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:662
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:571
size_t getNumPredecessors() const
Definition: VPlan.h:536
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:604
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:200
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:520
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:474
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:153
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:172
const VPRegionBlock * getParent() const
Definition: VPlan.h:490
const std::string & getName() const
Definition: VPlan.h:480
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:623
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:561
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:595
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:531
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:555
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:620
unsigned getVPBlockID() const
Definition: VPlan.h:487
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:648
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:466
VPBlocksTy & getSuccessors()
Definition: VPlan.h:515
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:192
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:584
void setParent(VPRegionBlock *P)
Definition: VPlan.h:500
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:577
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:525
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:514
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:4194
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:4298
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:4203
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlan.h:4319
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:4238
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4257
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4276
static void reassociateBlocks(VPBlockBase *Old, VPBlockBase *New)
Reassociate all the blocks connected to Old so that they now point to New.
Definition: VPlan.h:4284
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition: VPlan.h:4220
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2847
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2883
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2871
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2849
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2855
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2890
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3226
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3269
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3241
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3233
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3228
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3262
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3257
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3245
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3276
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
unsigned getVPDefID() const
Definition: VPlanValue.h:420
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3403
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3442
VPValue * getStepValue() const
Definition: VPlan.h:3459
Type * getScalarType() const
Definition: VPlan.h:3454
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3430
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3422
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3462
VPValue * getStartValue() const
Definition: VPlan.h:3458
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3414
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3322
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3335
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3329
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3339
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3345
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3324
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3352
Recipe to expand a SCEV expression.
Definition: VPlan.h:3187
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:3192
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3207
const SCEV * getSCEV() const
Definition: VPlan.h:3219
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3197
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:2026
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
static bool classof(const VPValue *V)
Definition: VPlan.h:2043
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:2028
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2074
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2063
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2071
VPValue * getStartValue() const
Definition: VPlan.h:2066
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2039
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2080
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1775
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1787
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPHistogramRecipe(unsigned Opcode, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:1781
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1804
unsigned getOpcode() const
Definition: VPlan.h:1800
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3668
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:451
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3672
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3689
~VPIRBasicBlock() override
Definition: VPlan.h:3677
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3679
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:473
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
Definition: VPlan.h:1380
Instruction & getInstruction() const
Definition: VPlan.h:1404
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1418
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1391
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1412
VPIRInstruction(Instruction &I)
Definition: VPlan.h:1384
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1192
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1292
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1267
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1303
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1210
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1198
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1213
@ CalculateTripCountMinusVF
Definition: VPlan.h:1211
bool hasResult() const
Definition: VPlan.h:1333
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1373
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1318
unsigned getOpcode() const
Definition: VPlan.h:1310
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1279
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1272
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1284
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2554
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2637
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2595
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2566
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2601
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2587
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2608
Instruction * getInsertPos() const
Definition: VPlan.h:2643
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2628
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2632
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:4367
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:229
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:73
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:176
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:210
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:213
VPLane(unsigned Lane)
Definition: VPlan.h:175
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:200
static VPLane getFirstLane()
Definition: VPlan.h:178
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:156
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:216
A recipe for forming partial reductions.
Definition: VPlan.h:2444
~VPPartialReductionRecipe() override=default
void execute(VPTransformState &State) override
Generate the reduction in the loop.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPartialReductionRecipe.
unsigned getOpcode() const
Get the binary op's opcode.
Definition: VPlan.h:2476
VPPartialReductionRecipe(unsigned Opcode, VPValue *Op0, VPValue *Op1, Instruction *ReductionInst=nullptr)
Definition: VPlan.h:2452
VPPartialReductionRecipe(Instruction *ReductionInst, VPValue *Op0, VPValue *Op1)
Definition: VPlan.h:2448
VPPartialReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2462
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2902
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2934
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2910
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:2921
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2906
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:714
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:803
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:739
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:808
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:781
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:725
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:740
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:786
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:730
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:792
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:925
ExactFlagsTy ExactFlags
Definition: VPlan.h:975
FastMathFlagsTy FMFs
Definition: VPlan.h:978
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:977
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:972
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1145
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1027
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1106
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPNoWrapFlags GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1052
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1058
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1039
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1075
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1148
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:997
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:974
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1033
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1045
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:976
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:983
WrapFlagsTy WrapFlags
Definition: VPlan.h:973
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1152
bool isDisjoint() const
Definition: VPlan.h:1164
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1139
bool hasNoSignedWrap() const
Definition: VPlan.h:1158
static bool classof(const VPUser *U)
Definition: VPlan.h:1069
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:990
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2729
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2759
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2756
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2731
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2740
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2378
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2434
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2406
~VPReductionPHIRecipe() override=default
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false, unsigned VFScaleFactor=1)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:2395
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2437
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2416
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2429
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2649
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2714
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2684
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL={})
Definition: VPlan.h:2669
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition: VPlan.h:2657
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2718
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2708
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2720
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2712
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2716
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2678
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2689
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3700
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:702
const VPBlockBase * getEntry() const
Definition: VPlan.h:3733
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3765
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3750
VPBlockBase * getExiting()
Definition: VPlan.h:3746
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3738
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:765
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:803
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3722
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3713
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:711
const VPBlockBase * getExiting() const
Definition: VPlan.h:3745
VPBlockBase * getEntry()
Definition: VPlan.h:3734
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3758
~VPRegionBlock() override
Definition: VPlan.h:3726
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3729
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2770
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2819
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2826
bool isUniform() const
Definition: VPlan.h:2814
bool isPredicated() const
Definition: VPlan.h:2816
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2789
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2779
unsigned getOpcode() const
Definition: VPlan.h:2843
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2838
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
Definition: VPlan.h:1903
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1927
VPReverseVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1941
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1934
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1920
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1907
const VPValue * getVFValue() const
Definition: VPlan.h:1916
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1579
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarCastRecipe.
Definition: VPlan.h:1604
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1594
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1618
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
Definition: VPlan.h:1587
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1616
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3472
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3519
VPValue * getStepValue() const
Definition: VPlan.h:3516
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3504
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:3482
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3492
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:3476
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Recipe to generate a scalar PHI.
Definition: VPlan.h:2262
VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, StringRef Name)
Definition: VPlan.h:2266
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2285
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPScalarPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPScalarPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2275
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:841
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:847
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:911
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:856
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:914
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:844
static bool classof(const VPUser *U)
Definition: VPlan.h:903
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:852
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:441
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:1177
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
operand_range operands()
Definition: VPlanValue.h:257
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:242
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_end()
Definition: VPlanValue.h:255
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:231
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1956
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1960
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1977
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1970
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1990
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1984
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1719
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1759
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1738
Function * getCalledScalarFunction() const
Definition: VPlan.h:1752
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1756
~VPWidenCallRecipe() override=default
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL={})
Definition: VPlan.h:1726
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3367
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3387
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3374
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3369
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1527
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1535
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1572
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1575
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1543
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1549
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1480
const VPValue * getEVL() const
Definition: VPlan.h:1504
~VPWidenEVLRecipe() override=default
VPWidenEVLRecipe(Instruction &I, iterator_range< IterT > Operands, VPValue &EVL)
Definition: VPlan.h:1485
VPWidenRecipe * clone() override final
Clone the current recipe.
Definition: VPlan.h:1496
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
Definition: VPlan.h:1489
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1503
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1511
A recipe for handling GEP instructions.
Definition: VPlan.h:1854
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1887
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1876
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1871
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2088
static bool classof(const VPValue *V)
Definition: VPlan.h:2104
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2124
PHINode * getPHINode() const
Definition: VPlan.h:2119
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2092
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2116
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2122
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2131
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2099
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2109
const VPValue * getStepValue() const
Definition: VPlan.h:2117
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2141
const TruncInst * getTruncInst() const
Definition: VPlan.h:2195
const VPValue * getVFValue() const
Definition: VPlan.h:2184
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2154
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2165
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2194
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2145
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2210
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2203
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1627
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, std::initializer_list< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1668
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1692
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1701
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1653
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1707
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1676
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1704
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1695
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1644
A common base class for widening memory operations.
Definition: VPlan.h:2943
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2954
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2951
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2990
static bool classof(const VPUser *U)
Definition: VPlan.h:2984
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:3010
Instruction & Ingredient
Definition: VPlan.h:2945
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2973
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition: VPlan.h:3018
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2948
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2977
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2964
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:3004
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:3000
void setMask(VPValue *Mask)
Definition: VPlan.h:2956
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2997
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2994
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:2301
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:2331
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:2340
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:2307
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2313
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:2337
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2231
~VPWidenPointerInductionRecipe() override=default
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:2222
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2248
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1429
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1445
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1440
unsigned getOpcode() const
Definition: VPlan.h:1469
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1434
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1451
static bool classof(const VPUser *U)
Definition: VPlan.h:1456
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:4123
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:4164
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1274
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:4374
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:4464
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:4452
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:4461
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3796
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1145
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1121
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:924
bool hasScalableVF()
Definition: VPlan.h:3988
VPBasicBlock * getEntry()
Definition: VPlan.h:3909
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4089
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3971
void setName(const Twine &newName)
Definition: VPlan.h:4016
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3977
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3974
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3950
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3964
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3994
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition: VPlan.h:3871
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3910
unsigned getUF() const
Definition: VPlan.h:4002
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
Definition: VPlan.cpp:845
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition: VPlan.cpp:1246
bool hasVF(ElementCount VF)
Definition: VPlan.h:3987
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4067
bool hasUF(unsigned UF) const
Definition: VPlan.h:4000
void setVF(ElementCount VF)
Definition: VPlan.h:3981
VPRegionBlock * createVPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Name and entry and exiting blocks set to nullptr.
Definition: VPlan.h:4101
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:309
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1052
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1046
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3928
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3957
VPBasicBlock * getMiddleBlock()
Definition: VPlan.h:3931
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:3879
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4079
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1252
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:4020
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1151
bool hasScalarVFOnly() const
Definition: VPlan.h:3998
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:3936
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:956
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4054
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1104
void addVF(ElementCount VF)
Definition: VPlan.h:3979
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:3941
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:4037
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4063
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1068
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:3914
void setUF(unsigned UF)
Definition: VPlan.h:4007
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1192
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:144
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:573
@ Other
Any other memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:92
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:97
iterator end()
Definition: VPlan.h:138
const ElementCount Start
Definition: VPlan.h:99
ElementCount End
Definition: VPlan.h:102
iterator begin()
Definition: VPlan.h:137
bool isEmpty() const
Definition: VPlan.h:104
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:108
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:682
LLVMContext & LLVMCtx
Definition: VPlan.h:686
LoopVectorizationCostModel & CM
Definition: VPlan.h:687
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1665
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:685
const TargetLibraryInfo & TLI
Definition: VPlan.h:684
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LoopVectorizationCostModel &CM)
Definition: VPlan.h:690
const TargetTransformInfo & TTI
Definition: VPlan.h:683
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:688
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2346
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2356
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2347
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2352
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:942
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:338
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:344
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:352
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:340
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:348
CFGState(DominatorTree *DT)
Definition: VPlan.h:357
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:355
DenseMap< VPValue *, Value * > VPV2Vector
Definition: VPlan.h:255
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
Definition: VPlan.h:257
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:269
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:267
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:366
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:389
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:392
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:394
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:385
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:353
void reset(VPValue *Def, Value *V, const VPLane &Lane)
Reset an existing scalar value for Def and a given Lane.
Definition: VPlan.h:306
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:250
void set(VPValue *Def, Value *V, const VPLane &Lane)
Set the generated scalar V for Def and the given Lane.
Definition: VPlan.h:296
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:369
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:242
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:375
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:372
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:290
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:245
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:279
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3062
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3074
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3090
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3063
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:3023
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3024
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3050
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3032
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1816
bool isInvariantCond() const
Definition: VPlan.h:1848
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1824
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1818
VPValue * getCond() const
Definition: VPlan.h:1844
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3142
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3153
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3172
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3143
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3156
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3101
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3130
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3102
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3118
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3109
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:4173
const Value * V
Definition: VPlan.h:4171
void print(raw_ostream &O) const
Definition: VPlan.cpp:1390