LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/Twine.h"
35#include "llvm/ADT/ilist.h"
36#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86/// A helper function that returns the reciprocal of the block probability of
87/// predicated blocks. If we return X, we are assuming the predicated block
88/// will execute once for every X iterations of the loop header.
89///
90/// TODO: We should use actual block probability here, if available. Currently,
91/// we always assume predicated blocks have a 50% chance of executing.
92inline unsigned getReciprocalPredBlockProb() { return 2; }
93
94/// A range of powers-of-2 vectorization factors with fixed start and
95/// adjustable end. The range includes start and excludes end, e.g.,:
96/// [1, 16) = {1, 2, 4, 8}
97struct VFRange {
98 // A power of 2.
100
101 // A power of 2. If End <= Start range is empty.
103
104 bool isEmpty() const {
106 }
107
109 : Start(Start), End(End) {
111 "Both Start and End should have the same scalable flag");
113 "Expected Start to be a power of 2");
115 "Expected End to be a power of 2");
116 }
117
118 /// Iterator to iterate over vectorization factors in a VFRange.
120 : public iterator_facade_base<iterator, std::forward_iterator_tag,
121 ElementCount> {
122 ElementCount VF;
123
124 public:
125 iterator(ElementCount VF) : VF(VF) {}
126
127 bool operator==(const iterator &Other) const { return VF == Other.VF; }
128
129 ElementCount operator*() const { return VF; }
130
132 VF *= 2;
133 return *this;
134 }
135 };
136
140 return iterator(End);
141 }
142};
143
144using VPlanPtr = std::unique_ptr<VPlan>;
145
146/// In what follows, the term "input IR" refers to code that is fed into the
147/// vectorizer whereas the term "output IR" refers to code that is generated by
148/// the vectorizer.
149
150/// VPLane provides a way to access lanes in both fixed width and scalable
151/// vectors, where for the latter the lane index sometimes needs calculating
152/// as a runtime expression.
153class VPLane {
154public:
155 /// Kind describes how to interpret Lane.
156 enum class Kind : uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
159 First,
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
165 };
166
167private:
168 /// in [0..VF)
169 unsigned Lane;
170
171 /// Indicates how the Lane should be interpreted, as described above.
172 Kind LaneKind;
173
174public:
175 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
176 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
177
179
180 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
181 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
184 Kind LaneKind;
185 if (VF.isScalable())
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
189 else
190 LaneKind = VPLane::Kind::First;
191 return VPLane(LaneOffset, LaneKind);
192 }
193
195 return getLaneFromEnd(VF, 1);
196 }
197
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind == Kind::First);
202 return Lane;
203 }
204
205 /// Returns an expression describing the lane index that can be used at
206 /// runtime.
207 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
208
209 /// Returns the Kind of lane offset.
210 Kind getKind() const { return LaneKind; }
211
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
214
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount &VF) const {
217 switch (LaneKind) {
219 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
220 return VF.getKnownMinValue() + Lane;
221 default:
222 assert(Lane < VF.getKnownMinValue());
223 return Lane;
224 }
225 }
226
227 /// Returns the maxmimum number of lanes that we are able to consider
228 /// caching for \p VF.
229 static unsigned getNumCachedLanes(const ElementCount &VF) {
230 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
231 }
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
240 Loop *CurrentParentLoop, Type *CanonicalIVTy);
241 /// Target Transform Info.
243
244 /// The chosen Vectorization Factor of the loop being vectorized.
246
247 /// Hold the index to generate specific scalar instructions. Null indicates
248 /// that all instances are to be generated, using either scalar or vector
249 /// instructions.
250 std::optional<VPLane> Lane;
251
252 struct DataState {
253 // Each value from the original loop, when vectorized, is represented by a
254 // vector value in the map.
256
259
260 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
261 /// is false, otherwise return the generated scalar. \See set.
262 Value *get(VPValue *Def, bool IsScalar = false);
263
264 /// Get the generated Value for a given VPValue and given Part and Lane.
265 Value *get(VPValue *Def, const VPLane &Lane);
266
267 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
268
270 auto I = Data.VPV2Scalars.find(Def);
271 if (I == Data.VPV2Scalars.end())
272 return false;
273 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
274 return CacheIdx < I->second.size() && I->second[CacheIdx];
275 }
276
277 /// Set the generated vector Value for a given VPValue, if \p
278 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
279 void set(VPValue *Def, Value *V, bool IsScalar = false) {
280 if (IsScalar) {
281 set(Def, V, VPLane(0));
282 return;
283 }
284 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
285 "scalar values must be stored as (0, 0)");
286 Data.VPV2Vector[Def] = V;
287 }
288
289 /// Reset an existing vector value for \p Def and a given \p Part.
290 void reset(VPValue *Def, Value *V) {
291 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
292 Data.VPV2Vector[Def] = V;
293 }
294
295 /// Set the generated scalar \p V for \p Def and the given \p Lane.
296 void set(VPValue *Def, Value *V, const VPLane &Lane) {
297 auto &Scalars = Data.VPV2Scalars[Def];
298 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
299 if (Scalars.size() <= CacheIdx)
300 Scalars.resize(CacheIdx + 1);
301 assert(!Scalars[CacheIdx] && "should overwrite existing value");
302 Scalars[CacheIdx] = V;
303 }
304
305 /// Reset an existing scalar value for \p Def and a given \p Lane.
306 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
307 auto Iter = Data.VPV2Scalars.find(Def);
308 assert(Iter != Data.VPV2Scalars.end() &&
309 "need to overwrite existing value");
310 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
311 assert(CacheIdx < Iter->second.size() &&
312 "need to overwrite existing value");
313 Iter->second[CacheIdx] = V;
314 }
315
316 /// Add additional metadata to \p To that was not present on \p Orig.
317 ///
318 /// Currently this is used to add the noalias annotations based on the
319 /// inserted memchecks. Use this for instructions that are *cloned* into the
320 /// vector loop.
321 void addNewMetadata(Instruction *To, const Instruction *Orig);
322
323 /// Add metadata from one instruction to another.
324 ///
325 /// This includes both the original MDs from \p From and additional ones (\see
326 /// addNewMetadata). Use this for *newly created* instructions in the vector
327 /// loop.
328 void addMetadata(Value *To, Instruction *From);
329
330 /// Set the debug location in the builder using the debug location \p DL.
332
333 /// Construct the vector value of a scalarized value \p V one lane at a time.
334 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
335
336 /// Hold state information used when constructing the CFG of the output IR,
337 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
338 struct CFGState {
339 /// The previous VPBasicBlock visited. Initially set to null.
341
342 /// The previous IR BasicBlock created or used. Initially set to the new
343 /// header BasicBlock.
344 BasicBlock *PrevBB = nullptr;
345
346 /// The last IR BasicBlock in the output IR. Set to the exit block of the
347 /// vector loop.
348 BasicBlock *ExitBB = nullptr;
349
350 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
351 /// of replication, maps the BasicBlock of the last replica created.
353
354 /// Updater for the DominatorTree.
356
358 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
359
360 /// Returns the BasicBlock* mapped to the pre-header of the loop region
361 /// containing \p R.
364
365 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
367
368 /// Hold a reference to the IRBuilder used to generate output IR code.
370
371 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
373
374 /// Pointer to the VPlan code is generated for.
376
377 /// The parent loop object for the current scope, or nullptr.
379
380 /// LoopVersioning. It's only set up (non-null) if memchecks were
381 /// used.
382 ///
383 /// This is currently only used to add no-alias metadata based on the
384 /// memchecks. The actually versioning is performed manually.
386
387 /// Map SCEVs to their expanded values. Populated when executing
388 /// VPExpandSCEVRecipes.
390
391 /// VPlan-based type analysis.
393};
394
395/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
396/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
398 friend class VPBlockUtils;
399
400 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
401
402 /// An optional name for the block.
403 std::string Name;
404
405 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
406 /// it is a topmost VPBlockBase.
407 VPRegionBlock *Parent = nullptr;
408
409 /// List of predecessor blocks.
411
412 /// List of successor blocks.
414
415 /// VPlan containing the block. Can only be set on the entry block of the
416 /// plan.
417 VPlan *Plan = nullptr;
418
419 /// Add \p Successor as the last successor to this block.
420 void appendSuccessor(VPBlockBase *Successor) {
421 assert(Successor && "Cannot add nullptr successor!");
422 Successors.push_back(Successor);
423 }
424
425 /// Add \p Predecessor as the last predecessor to this block.
426 void appendPredecessor(VPBlockBase *Predecessor) {
427 assert(Predecessor && "Cannot add nullptr predecessor!");
428 Predecessors.push_back(Predecessor);
429 }
430
431 /// Remove \p Predecessor from the predecessors of this block.
432 void removePredecessor(VPBlockBase *Predecessor) {
433 auto Pos = find(Predecessors, Predecessor);
434 assert(Pos && "Predecessor does not exist");
435 Predecessors.erase(Pos);
436 }
437
438 /// Remove \p Successor from the successors of this block.
439 void removeSuccessor(VPBlockBase *Successor) {
440 auto Pos = find(Successors, Successor);
441 assert(Pos && "Successor does not exist");
442 Successors.erase(Pos);
443 }
444
445 /// This function replaces one predecessor with another, useful when
446 /// trying to replace an old block in the CFG with a new one.
447 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
448 auto I = find(Predecessors, Old);
449 assert(I != Predecessors.end());
450 assert(Old->getParent() == New->getParent() &&
451 "replaced predecessor must have the same parent");
452 *I = New;
453 }
454
455 /// This function replaces one successor with another, useful when
456 /// trying to replace an old block in the CFG with a new one.
457 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
458 auto I = find(Successors, Old);
459 assert(I != Successors.end());
460 assert(Old->getParent() == New->getParent() &&
461 "replaced successor must have the same parent");
462 *I = New;
463 }
464
465protected:
466 VPBlockBase(const unsigned char SC, const std::string &N)
467 : SubclassID(SC), Name(N) {}
468
469public:
470 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
471 /// that are actually instantiated. Values of this enumeration are kept in the
472 /// SubclassID field of the VPBlockBase objects. They are used for concrete
473 /// type identification.
474 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
475
477
478 virtual ~VPBlockBase() = default;
479
480 const std::string &getName() const { return Name; }
481
482 void setName(const Twine &newName) { Name = newName.str(); }
483
484 /// \return an ID for the concrete type of this object.
485 /// This is used to implement the classof checks. This should not be used
486 /// for any other purpose, as the values may change as LLVM evolves.
487 unsigned getVPBlockID() const { return SubclassID; }
488
489 VPRegionBlock *getParent() { return Parent; }
490 const VPRegionBlock *getParent() const { return Parent; }
491
492 /// \return A pointer to the plan containing the current block.
493 VPlan *getPlan();
494 const VPlan *getPlan() const;
495
496 /// Sets the pointer of the plan containing the block. The block must be the
497 /// entry block into the VPlan.
498 void setPlan(VPlan *ParentPlan);
499
500 void setParent(VPRegionBlock *P) { Parent = P; }
501
502 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
503 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
504 /// VPBlockBase is a VPBasicBlock, it is returned.
505 const VPBasicBlock *getEntryBasicBlock() const;
507
508 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
509 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
510 /// VPBlockBase is a VPBasicBlock, it is returned.
511 const VPBasicBlock *getExitingBasicBlock() const;
513
514 const VPBlocksTy &getSuccessors() const { return Successors; }
515 VPBlocksTy &getSuccessors() { return Successors; }
516
519
520 const VPBlocksTy &getPredecessors() const { return Predecessors; }
521 VPBlocksTy &getPredecessors() { return Predecessors; }
522
523 /// \return the successor of this VPBlockBase if it has a single successor.
524 /// Otherwise return a null pointer.
526 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
527 }
528
529 /// \return the predecessor of this VPBlockBase if it has a single
530 /// predecessor. Otherwise return a null pointer.
532 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
533 }
534
535 size_t getNumSuccessors() const { return Successors.size(); }
536 size_t getNumPredecessors() const { return Predecessors.size(); }
537
538 /// An Enclosing Block of a block B is any block containing B, including B
539 /// itself. \return the closest enclosing block starting from "this", which
540 /// has successors. \return the root enclosing block if all enclosing blocks
541 /// have no successors.
543
544 /// \return the closest enclosing block starting from "this", which has
545 /// predecessors. \return the root enclosing block if all enclosing blocks
546 /// have no predecessors.
548
549 /// \return the successors either attached directly to this VPBlockBase or, if
550 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
551 /// successors of its own, search recursively for the first enclosing
552 /// VPRegionBlock that has successors and return them. If no such
553 /// VPRegionBlock exists, return the (empty) successors of the topmost
554 /// VPBlockBase reached.
557 }
558
559 /// \return the hierarchical successor of this VPBlockBase if it has a single
560 /// hierarchical successor. Otherwise return a null pointer.
563 }
564
565 /// \return the predecessors either attached directly to this VPBlockBase or,
566 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
567 /// predecessors of its own, search recursively for the first enclosing
568 /// VPRegionBlock that has predecessors and return them. If no such
569 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
570 /// VPBlockBase reached.
573 }
574
575 /// \return the hierarchical predecessor of this VPBlockBase if it has a
576 /// single hierarchical predecessor. Otherwise return a null pointer.
579 }
580
581 /// Set a given VPBlockBase \p Successor as the single successor of this
582 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
583 /// This VPBlockBase must have no successors.
585 assert(Successors.empty() && "Setting one successor when others exist.");
586 assert(Successor->getParent() == getParent() &&
587 "connected blocks must have the same parent");
588 appendSuccessor(Successor);
589 }
590
591 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
592 /// successors of this VPBlockBase. This VPBlockBase is not added as
593 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
594 /// successors.
595 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
596 assert(Successors.empty() && "Setting two successors when others exist.");
597 appendSuccessor(IfTrue);
598 appendSuccessor(IfFalse);
599 }
600
601 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
602 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
603 /// as successor of any VPBasicBlock in \p NewPreds.
605 assert(Predecessors.empty() && "Block predecessors already set.");
606 for (auto *Pred : NewPreds)
607 appendPredecessor(Pred);
608 }
609
610 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
611 /// This VPBlockBase must have no successors. This VPBlockBase is not added
612 /// as predecessor of any VPBasicBlock in \p NewSuccs.
614 assert(Successors.empty() && "Block successors already set.");
615 for (auto *Succ : NewSuccs)
616 appendSuccessor(Succ);
617 }
618
619 /// Remove all the predecessor of this block.
620 void clearPredecessors() { Predecessors.clear(); }
621
622 /// Remove all the successors of this block.
623 void clearSuccessors() { Successors.clear(); }
624
625 /// Swap successors of the block. The block must have exactly 2 successors.
626 // TODO: This should be part of introducing conditional branch recipes rather
627 // than being independent.
629 assert(Successors.size() == 2 && "must have 2 successors to swap");
630 std::swap(Successors[0], Successors[1]);
631 }
632
633 /// The method which generates the output IR that correspond to this
634 /// VPBlockBase, thereby "executing" the VPlan.
635 virtual void execute(VPTransformState *State) = 0;
636
637 /// Return the cost of the block.
639
640 /// Return true if it is legal to hoist instructions into this block.
642 // There are currently no constraints that prevent an instruction to be
643 // hoisted into a VPBlockBase.
644 return true;
645 }
646
647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
648 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
649 OS << getName();
650 }
651
652 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
653 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
654 /// consequtive numbers.
655 ///
656 /// Note that the numbering is applied to the whole VPlan, so printing
657 /// individual blocks is consistent with the whole VPlan printing.
658 virtual void print(raw_ostream &O, const Twine &Indent,
659 VPSlotTracker &SlotTracker) const = 0;
660
661 /// Print plain-text dump of this VPlan to \p O.
662 void print(raw_ostream &O) const {
664 print(O, "", SlotTracker);
665 }
666
667 /// Print the successors of this block to \p O, prefixing all lines with \p
668 /// Indent.
669 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
670
671 /// Dump this VPBlockBase to dbgs().
672 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
673#endif
674
675 /// Clone the current block and it's recipes without updating the operands of
676 /// the cloned recipes, including all blocks in the single-entry single-exit
677 /// region for VPRegionBlocks.
678 virtual VPBlockBase *clone() = 0;
679};
680
681/// Struct to hold various analysis needed for cost computations.
689
692 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
693 CM(CM) {}
694
695 /// Return the cost for \p UI with \p VF using the legacy cost model as
696 /// fallback until computing the cost of all recipes migrates to VPlan.
698
699 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
700 /// has already been pre-computed.
701 bool skipCostComputation(Instruction *UI, bool IsVector) const;
702
703 /// Returns the OperandInfo for \p V, if it is a live-in.
705};
706
707/// VPRecipeBase is a base class modeling a sequence of one or more output IR
708/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
709/// and is responsible for deleting its defined values. Single-value
710/// recipes must inherit from VPSingleDef instead of inheriting from both
711/// VPRecipeBase and VPValue separately.
712class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
713 public VPDef,
714 public VPUser {
715 friend VPBasicBlock;
716 friend class VPBlockUtils;
717
718 /// Each VPRecipe belongs to a single VPBasicBlock.
719 VPBasicBlock *Parent = nullptr;
720
721 /// The debug location for the recipe.
722 DebugLoc DL;
723
724public:
726 DebugLoc DL = {})
727 : VPDef(SC), VPUser(Operands), DL(DL) {}
728
729 template <typename IterT>
731 DebugLoc DL = {})
732 : VPDef(SC), VPUser(Operands), DL(DL) {}
733 virtual ~VPRecipeBase() = default;
734
735 /// Clone the current recipe.
736 virtual VPRecipeBase *clone() = 0;
737
738 /// \return the VPBasicBlock which this VPRecipe belongs to.
739 VPBasicBlock *getParent() { return Parent; }
740 const VPBasicBlock *getParent() const { return Parent; }
741
742 /// The method which generates the output IR instructions that correspond to
743 /// this VPRecipe, thereby "executing" the VPlan.
744 virtual void execute(VPTransformState &State) = 0;
745
746 /// Return the cost of this recipe, taking into account if the cost
747 /// computation should be skipped and the ForceTargetInstructionCost flag.
748 /// Also takes care of printing the cost for debugging.
750
751 /// Insert an unlinked recipe into a basic block immediately before
752 /// the specified recipe.
753 void insertBefore(VPRecipeBase *InsertPos);
754 /// Insert an unlinked recipe into \p BB immediately before the insertion
755 /// point \p IP;
757
758 /// Insert an unlinked Recipe into a basic block immediately after
759 /// the specified Recipe.
760 void insertAfter(VPRecipeBase *InsertPos);
761
762 /// Unlink this recipe from its current VPBasicBlock and insert it into
763 /// the VPBasicBlock that MovePos lives in, right after MovePos.
764 void moveAfter(VPRecipeBase *MovePos);
765
766 /// Unlink this recipe and insert into BB before I.
767 ///
768 /// \pre I is a valid iterator into BB.
770
771 /// This method unlinks 'this' from the containing basic block, but does not
772 /// delete it.
773 void removeFromParent();
774
775 /// This method unlinks 'this' from the containing basic block and deletes it.
776 ///
777 /// \returns an iterator pointing to the element after the erased one
779
780 /// Method to support type inquiry through isa, cast, and dyn_cast.
781 static inline bool classof(const VPDef *D) {
782 // All VPDefs are also VPRecipeBases.
783 return true;
784 }
785
786 static inline bool classof(const VPUser *U) { return true; }
787
788 /// Returns true if the recipe may have side-effects.
789 bool mayHaveSideEffects() const;
790
791 /// Returns true for PHI-like recipes.
792 bool isPhi() const {
793 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
794 }
795
796 /// Returns true if the recipe may read from memory.
797 bool mayReadFromMemory() const;
798
799 /// Returns true if the recipe may write to memory.
800 bool mayWriteToMemory() const;
801
802 /// Returns true if the recipe may read from or write to memory.
803 bool mayReadOrWriteMemory() const {
805 }
806
807 /// Returns the debug location of the recipe.
808 DebugLoc getDebugLoc() const { return DL; }
809
810protected:
811 /// Compute the cost of this recipe either using a recipe's specialized
812 /// implementation or using the legacy cost model and the underlying
813 /// instructions.
815 VPCostContext &Ctx) const;
816};
817
818// Helper macro to define common classof implementations for recipes.
819#define VP_CLASSOF_IMPL(VPDefID) \
820 static inline bool classof(const VPDef *D) { \
821 return D->getVPDefID() == VPDefID; \
822 } \
823 static inline bool classof(const VPValue *V) { \
824 auto *R = V->getDefiningRecipe(); \
825 return R && R->getVPDefID() == VPDefID; \
826 } \
827 static inline bool classof(const VPUser *U) { \
828 auto *R = dyn_cast<VPRecipeBase>(U); \
829 return R && R->getVPDefID() == VPDefID; \
830 } \
831 static inline bool classof(const VPRecipeBase *R) { \
832 return R->getVPDefID() == VPDefID; \
833 } \
834 static inline bool classof(const VPSingleDefRecipe *R) { \
835 return R->getVPDefID() == VPDefID; \
836 }
837
838/// VPSingleDef is a base class for recipes for modeling a sequence of one or
839/// more output IR that define a single result VPValue.
840/// Note that VPRecipeBase must be inherited from before VPValue.
841class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
842public:
843 template <typename IterT>
844 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
845 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
846
847 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
848 DebugLoc DL = {})
849 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
850
851 template <typename IterT>
852 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
853 DebugLoc DL = {})
854 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
855
856 static inline bool classof(const VPRecipeBase *R) {
857 switch (R->getVPDefID()) {
858 case VPRecipeBase::VPDerivedIVSC:
859 case VPRecipeBase::VPEVLBasedIVPHISC:
860 case VPRecipeBase::VPExpandSCEVSC:
861 case VPRecipeBase::VPInstructionSC:
862 case VPRecipeBase::VPReductionEVLSC:
863 case VPRecipeBase::VPReductionSC:
864 case VPRecipeBase::VPReplicateSC:
865 case VPRecipeBase::VPScalarIVStepsSC:
866 case VPRecipeBase::VPVectorPointerSC:
867 case VPRecipeBase::VPReverseVectorPointerSC:
868 case VPRecipeBase::VPWidenCallSC:
869 case VPRecipeBase::VPWidenCanonicalIVSC:
870 case VPRecipeBase::VPWidenCastSC:
871 case VPRecipeBase::VPWidenGEPSC:
872 case VPRecipeBase::VPWidenIntrinsicSC:
873 case VPRecipeBase::VPWidenSC:
874 case VPRecipeBase::VPWidenEVLSC:
875 case VPRecipeBase::VPWidenSelectSC:
876 case VPRecipeBase::VPBlendSC:
877 case VPRecipeBase::VPPredInstPHISC:
878 case VPRecipeBase::VPCanonicalIVPHISC:
879 case VPRecipeBase::VPActiveLaneMaskPHISC:
880 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
881 case VPRecipeBase::VPWidenPHISC:
882 case VPRecipeBase::VPWidenIntOrFpInductionSC:
883 case VPRecipeBase::VPWidenPointerInductionSC:
884 case VPRecipeBase::VPReductionPHISC:
885 case VPRecipeBase::VPScalarCastSC:
886 return true;
887 case VPRecipeBase::VPBranchOnMaskSC:
888 case VPRecipeBase::VPInterleaveSC:
889 case VPRecipeBase::VPIRInstructionSC:
890 case VPRecipeBase::VPWidenLoadEVLSC:
891 case VPRecipeBase::VPWidenLoadSC:
892 case VPRecipeBase::VPWidenStoreEVLSC:
893 case VPRecipeBase::VPWidenStoreSC:
894 case VPRecipeBase::VPHistogramSC:
895 // TODO: Widened stores don't define a value, but widened loads do. Split
896 // the recipes to be able to make widened loads VPSingleDefRecipes.
897 return false;
898 }
899 llvm_unreachable("Unhandled VPDefID");
900 }
901
902 static inline bool classof(const VPUser *U) {
903 auto *R = dyn_cast<VPRecipeBase>(U);
904 return R && classof(R);
905 }
906
907 virtual VPSingleDefRecipe *clone() override = 0;
908
909 /// Returns the underlying instruction.
911 return cast<Instruction>(getUnderlyingValue());
912 }
914 return cast<Instruction>(getUnderlyingValue());
915 }
916
917#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
918 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
919 LLVM_DUMP_METHOD void dump() const;
920#endif
921};
922
923/// Class to record LLVM IR flag for a recipe along with it.
925 enum class OperationType : unsigned char {
926 Cmp,
927 OverflowingBinOp,
928 DisjointOp,
929 PossiblyExactOp,
930 GEPOp,
931 FPMathOp,
932 NonNegOp,
933 Other
934 };
935
936public:
937 struct WrapFlagsTy {
938 char HasNUW : 1;
939 char HasNSW : 1;
940
942 };
943
945 char IsDisjoint : 1;
947 };
948
949private:
950 struct ExactFlagsTy {
951 char IsExact : 1;
952 };
953 struct NonNegFlagsTy {
954 char NonNeg : 1;
955 };
956 struct FastMathFlagsTy {
957 char AllowReassoc : 1;
958 char NoNaNs : 1;
959 char NoInfs : 1;
960 char NoSignedZeros : 1;
961 char AllowReciprocal : 1;
962 char AllowContract : 1;
963 char ApproxFunc : 1;
964
965 FastMathFlagsTy(const FastMathFlags &FMF);
966 };
967
968 OperationType OpType;
969
970 union {
974 ExactFlagsTy ExactFlags;
976 NonNegFlagsTy NonNegFlags;
977 FastMathFlagsTy FMFs;
978 unsigned AllFlags;
979 };
980
981protected:
983 OpType = Other.OpType;
984 AllFlags = Other.AllFlags;
985 }
986
987public:
988 template <typename IterT>
989 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
990 : VPSingleDefRecipe(SC, Operands, DL) {
991 OpType = OperationType::Other;
992 AllFlags = 0;
993 }
994
995 template <typename IterT>
996 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
998 if (auto *Op = dyn_cast<CmpInst>(&I)) {
999 OpType = OperationType::Cmp;
1000 CmpPredicate = Op->getPredicate();
1001 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1002 OpType = OperationType::DisjointOp;
1003 DisjointFlags.IsDisjoint = Op->isDisjoint();
1004 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1005 OpType = OperationType::OverflowingBinOp;
1006 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1007 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1008 OpType = OperationType::PossiblyExactOp;
1009 ExactFlags.IsExact = Op->isExact();
1010 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1011 OpType = OperationType::GEPOp;
1012 GEPFlags = GEP->getNoWrapFlags();
1013 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1014 OpType = OperationType::NonNegOp;
1015 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1016 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1017 OpType = OperationType::FPMathOp;
1018 FMFs = Op->getFastMathFlags();
1019 } else {
1020 OpType = OperationType::Other;
1021 AllFlags = 0;
1022 }
1023 }
1024
1025 template <typename IterT>
1026 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1027 CmpInst::Predicate Pred, DebugLoc DL = {})
1028 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1029 CmpPredicate(Pred) {}
1030
1031 template <typename IterT>
1032 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1034 : VPSingleDefRecipe(SC, Operands, DL),
1035 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1036
1037 template <typename IterT>
1038 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1039 FastMathFlags FMFs, DebugLoc DL = {})
1040 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1041 FMFs(FMFs) {}
1042
1043 template <typename IterT>
1044 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1046 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1048
1049protected:
1050 template <typename IterT>
1051 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1053 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1054 GEPFlags(GEPFlags) {}
1055
1056public:
1057 static inline bool classof(const VPRecipeBase *R) {
1058 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1059 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1060 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1061 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1062 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1063 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1064 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1065 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1066 }
1067
1068 static inline bool classof(const VPUser *U) {
1069 auto *R = dyn_cast<VPRecipeBase>(U);
1070 return R && classof(R);
1071 }
1072
1073 /// Drop all poison-generating flags.
1075 // NOTE: This needs to be kept in-sync with
1076 // Instruction::dropPoisonGeneratingFlags.
1077 switch (OpType) {
1078 case OperationType::OverflowingBinOp:
1079 WrapFlags.HasNUW = false;
1080 WrapFlags.HasNSW = false;
1081 break;
1082 case OperationType::DisjointOp:
1083 DisjointFlags.IsDisjoint = false;
1084 break;
1085 case OperationType::PossiblyExactOp:
1086 ExactFlags.IsExact = false;
1087 break;
1088 case OperationType::GEPOp:
1090 break;
1091 case OperationType::FPMathOp:
1092 FMFs.NoNaNs = false;
1093 FMFs.NoInfs = false;
1094 break;
1095 case OperationType::NonNegOp:
1096 NonNegFlags.NonNeg = false;
1097 break;
1098 case OperationType::Cmp:
1099 case OperationType::Other:
1100 break;
1101 }
1102 }
1103
1104 /// Set the IR flags for \p I.
1105 void setFlags(Instruction *I) const {
1106 switch (OpType) {
1107 case OperationType::OverflowingBinOp:
1108 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1109 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1110 break;
1111 case OperationType::DisjointOp:
1112 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1113 break;
1114 case OperationType::PossiblyExactOp:
1115 I->setIsExact(ExactFlags.IsExact);
1116 break;
1117 case OperationType::GEPOp:
1118 cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
1119 break;
1120 case OperationType::FPMathOp:
1121 I->setHasAllowReassoc(FMFs.AllowReassoc);
1122 I->setHasNoNaNs(FMFs.NoNaNs);
1123 I->setHasNoInfs(FMFs.NoInfs);
1124 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1125 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1126 I->setHasAllowContract(FMFs.AllowContract);
1127 I->setHasApproxFunc(FMFs.ApproxFunc);
1128 break;
1129 case OperationType::NonNegOp:
1130 I->setNonNeg(NonNegFlags.NonNeg);
1131 break;
1132 case OperationType::Cmp:
1133 case OperationType::Other:
1134 break;
1135 }
1136 }
1137
1139 assert(OpType == OperationType::Cmp &&
1140 "recipe doesn't have a compare predicate");
1141 return CmpPredicate;
1142 }
1143
1145
1146 /// Returns true if the recipe has fast-math flags.
1147 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1148
1150
1151 bool hasNoUnsignedWrap() const {
1152 assert(OpType == OperationType::OverflowingBinOp &&
1153 "recipe doesn't have a NUW flag");
1154 return WrapFlags.HasNUW;
1155 }
1156
1157 bool hasNoSignedWrap() const {
1158 assert(OpType == OperationType::OverflowingBinOp &&
1159 "recipe doesn't have a NSW flag");
1160 return WrapFlags.HasNSW;
1161 }
1162
1163 bool isDisjoint() const {
1164 assert(OpType == OperationType::DisjointOp &&
1165 "recipe cannot have a disjoing flag");
1167 }
1168
1169#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1170 void printFlags(raw_ostream &O) const;
1171#endif
1172};
1173
1174/// Helper to access the operand that contains the unroll part for this recipe
1175/// after unrolling.
1176template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1177protected:
1178 /// Return the VPValue operand containing the unroll part or null if there is
1179 /// no such operand.
1181
1182 /// Return the unroll part.
1183 unsigned getUnrollPart(VPUser &U) const;
1184};
1185
1186/// This is a concrete Recipe that models a single VPlan-level instruction.
1187/// While as any Recipe it may generate a sequence of IR instructions when
1188/// executed, these instructions would always form a single-def expression as
1189/// the VPInstruction is also a single def-use vertex.
1191 public VPUnrollPartAccessor<1> {
1192 friend class VPlanSlp;
1193
1194public:
1195 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1196 enum {
1198 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1199 // values of a first-order recurrence.
1205 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1206 /// The first operand is the incoming value from the predecessor in VPlan,
1207 /// the second operand is the incoming value for all other predecessors
1208 /// (which are currently not modeled in VPlan).
1211 // Increment the canonical IV separately for each unrolled part.
1216 // Takes the VPValue to extract from as first operand and the lane or part
1217 // to extract as second operand, counting from the end starting with 1 for
1218 // last. The second operand must be a positive constant and <= VF.
1220 LogicalAnd, // Non-poison propagating logical And.
1221 // Add an offset in bytes (second operand) to a base pointer (first
1222 // operand). Only generates scalar values (either for the first lane only or
1223 // for all lanes, depending on its uses).
1225 // Returns a scalar boolean value, which is true if any lane of its single
1226 // operand is true.
1228 };
1229
1230private:
1231 typedef unsigned char OpcodeTy;
1232 OpcodeTy Opcode;
1233
1234 /// An optional name that can be used for the generated IR instruction.
1235 const std::string Name;
1236
1237 /// Returns true if this VPInstruction generates scalar values for all lanes.
1238 /// Most VPInstructions generate a single value per part, either vector or
1239 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1240 /// values per all lanes, stemming from an original ingredient. This method
1241 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1242 /// underlying ingredient.
1243 bool doesGeneratePerAllLanes() const;
1244
1245 /// Returns true if we can generate a scalar for the first lane only if
1246 /// needed.
1247 bool canGenerateScalarForFirstLane() const;
1248
1249 /// Utility methods serving execute(): generates a single vector instance of
1250 /// the modeled instruction. \returns the generated value. . In some cases an
1251 /// existing value is returned rather than a generated one.
1252 Value *generate(VPTransformState &State);
1253
1254 /// Utility methods serving execute(): generates a scalar single instance of
1255 /// the modeled instruction for a given lane. \returns the scalar generated
1256 /// value for lane \p Lane.
1257 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1258
1259#if !defined(NDEBUG)
1260 /// Return true if the VPInstruction is a floating point math operation, i.e.
1261 /// has fast-math flags.
1262 bool isFPMathOp() const;
1263#endif
1264
1265public:
1267 const Twine &Name = "")
1268 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1269 Opcode(Opcode), Name(Name.str()) {}
1270
1271 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1272 DebugLoc DL = {}, const Twine &Name = "")
1274
1275 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1276 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1277
1278 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1279 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1280 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1281 Opcode(Opcode), Name(Name.str()) {}
1282
1283 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1284 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1285 const Twine &Name = "")
1286 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1287 Opcode(Opcode), Name(Name.str()) {
1288 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1289 }
1290
1292 DebugLoc DL = {}, const Twine &Name = "")
1293 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1294 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1295 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1296
1297 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1298 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1299
1300 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1301
1302 VPInstruction *clone() override {
1304 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1305 New->transferFlags(*this);
1306 return New;
1307 }
1308
1309 unsigned getOpcode() const { return Opcode; }
1310
1311 /// Generate the instruction.
1312 /// TODO: We currently execute only per-part unless a specific instance is
1313 /// provided.
1314 void execute(VPTransformState &State) override;
1315
1316 /// Return the cost of this VPInstruction.
1318 VPCostContext &Ctx) const override {
1319 // TODO: Compute accurate cost after retiring the legacy cost model.
1320 return 0;
1321 }
1322
1323#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1324 /// Print the VPInstruction to \p O.
1325 void print(raw_ostream &O, const Twine &Indent,
1326 VPSlotTracker &SlotTracker) const override;
1327
1328 /// Print the VPInstruction to dbgs() (for debugging).
1329 LLVM_DUMP_METHOD void dump() const;
1330#endif
1331
1332 bool hasResult() const {
1333 // CallInst may or may not have a result, depending on the called function.
1334 // Conservatively return calls have results for now.
1335 switch (getOpcode()) {
1336 case Instruction::Ret:
1337 case Instruction::Br:
1338 case Instruction::Store:
1339 case Instruction::Switch:
1340 case Instruction::IndirectBr:
1341 case Instruction::Resume:
1342 case Instruction::CatchRet:
1343 case Instruction::Unreachable:
1344 case Instruction::Fence:
1345 case Instruction::AtomicRMW:
1348 return false;
1349 default:
1350 return true;
1351 }
1352 }
1353
1354 /// Returns true if the recipe only uses the first lane of operand \p Op.
1355 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1356
1357 /// Returns true if the recipe only uses the first part of operand \p Op.
1358 bool onlyFirstPartUsed(const VPValue *Op) const override;
1359
1360 /// Returns true if this VPInstruction produces a scalar value from a vector,
1361 /// e.g. by performing a reduction or extracting a lane.
1362 bool isVectorToScalar() const;
1363
1364 /// Returns true if this VPInstruction's operands are single scalars and the
1365 /// result is also a single scalar.
1366 bool isSingleScalar() const;
1367
1368 /// Returns the symbolic name assigned to the VPInstruction.
1369 StringRef getName() const { return Name; }
1370};
1371
1372/// A recipe to wrap on original IR instruction not to be modified during
1373/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1374/// and it is used to add a new incoming value for the single predecessor VPBB.
1375/// Expect PHIs, VPIRInstructions cannot have any operands.
1377 Instruction &I;
1378
1379public:
1381 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1382
1383 ~VPIRInstruction() override = default;
1384
1385 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1386
1388 auto *R = new VPIRInstruction(I);
1389 for (auto *Op : operands())
1390 R->addOperand(Op);
1391 return R;
1392 }
1393
1394 void execute(VPTransformState &State) override;
1395
1396 /// Return the cost of this VPIRInstruction.
1398 VPCostContext &Ctx) const override;
1399
1400 Instruction &getInstruction() const { return I; }
1401
1402#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1403 /// Print the recipe.
1404 void print(raw_ostream &O, const Twine &Indent,
1405 VPSlotTracker &SlotTracker) const override;
1406#endif
1407
1408 bool usesScalars(const VPValue *Op) const override {
1410 "Op must be an operand of the recipe");
1411 return true;
1412 }
1413
1414 bool onlyFirstPartUsed(const VPValue *Op) const override {
1416 "Op must be an operand of the recipe");
1417 return true;
1418 }
1419};
1420
1421/// VPWidenRecipe is a recipe for producing a widened instruction using the
1422/// opcode and operands of the recipe. This recipe covers most of the
1423/// traditional vectorization cases where each recipe transforms into a
1424/// vectorized version of itself.
1426 unsigned Opcode;
1427
1428protected:
1429 template <typename IterT>
1430 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1432 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1433
1434public:
1435 template <typename IterT>
1437 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1438
1439 ~VPWidenRecipe() override = default;
1440
1441 VPWidenRecipe *clone() override {
1442 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1443 R->transferFlags(*this);
1444 return R;
1445 }
1446
1447 static inline bool classof(const VPRecipeBase *R) {
1448 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1449 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1450 }
1451
1452 static inline bool classof(const VPUser *U) {
1453 auto *R = dyn_cast<VPRecipeBase>(U);
1454 return R && classof(R);
1455 }
1456
1457 /// Produce a widened instruction using the opcode and operands of the recipe,
1458 /// processing State.VF elements.
1459 void execute(VPTransformState &State) override;
1460
1461 /// Return the cost of this VPWidenRecipe.
1463 VPCostContext &Ctx) const override;
1464
1465 unsigned getOpcode() const { return Opcode; }
1466
1467#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1468 /// Print the recipe.
1469 void print(raw_ostream &O, const Twine &Indent,
1470 VPSlotTracker &SlotTracker) const override;
1471#endif
1472};
1473
1474/// A recipe for widening operations with vector-predication intrinsics with
1475/// explicit vector length (EVL).
1478
1479public:
1480 template <typename IterT>
1482 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1483 addOperand(&EVL);
1484 }
1486 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1487 transferFlags(W);
1488 }
1489
1490 ~VPWidenEVLRecipe() override = default;
1491
1492 VPWidenRecipe *clone() override final {
1493 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1494 return nullptr;
1495 }
1496
1497 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1498
1500 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1501
1502 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1503 /// processing EVL elements.
1504 void execute(VPTransformState &State) override final;
1505
1506 /// Returns true if the recipe only uses the first lane of operand \p Op.
1507 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1509 "Op must be an operand of the recipe");
1510 // EVL in that recipe is always the last operand, thus any use before means
1511 // the VPValue should be vectorized.
1512 return getEVL() == Op;
1513 }
1514
1515#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1516 /// Print the recipe.
1517 void print(raw_ostream &O, const Twine &Indent,
1518 VPSlotTracker &SlotTracker) const override final;
1519#endif
1520};
1521
1522/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1524 /// Cast instruction opcode.
1525 Instruction::CastOps Opcode;
1526
1527 /// Result type for the cast.
1528 Type *ResultTy;
1529
1530public:
1532 CastInst &UI)
1533 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1534 ResultTy(ResultTy) {
1535 assert(UI.getOpcode() == Opcode &&
1536 "opcode of underlying cast doesn't match");
1537 }
1538
1540 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1541 ResultTy(ResultTy) {}
1542
1543 ~VPWidenCastRecipe() override = default;
1544
1546 if (auto *UV = getUnderlyingValue())
1547 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1548 *cast<CastInst>(UV));
1549
1550 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1551 }
1552
1553 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1554
1555 /// Produce widened copies of the cast.
1556 void execute(VPTransformState &State) override;
1557
1558 /// Return the cost of this VPWidenCastRecipe.
1560 VPCostContext &Ctx) const override;
1561
1562#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1563 /// Print the recipe.
1564 void print(raw_ostream &O, const Twine &Indent,
1565 VPSlotTracker &SlotTracker) const override;
1566#endif
1567
1568 Instruction::CastOps getOpcode() const { return Opcode; }
1569
1570 /// Returns the result type of the cast.
1571 Type *getResultType() const { return ResultTy; }
1572};
1573
1574/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1576 Instruction::CastOps Opcode;
1577
1578 Type *ResultTy;
1579
1580 Value *generate(VPTransformState &State);
1581
1582public:
1584 DebugLoc DL)
1585 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}, DL), Opcode(Opcode),
1586 ResultTy(ResultTy) {}
1587
1588 ~VPScalarCastRecipe() override = default;
1589
1591 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy,
1592 getDebugLoc());
1593 }
1594
1595 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1596
1597 void execute(VPTransformState &State) override;
1598
1599 /// Return the cost of this VPScalarCastRecipe.
1601 VPCostContext &Ctx) const override {
1602 // TODO: Compute accurate cost after retiring the legacy cost model.
1603 return 0;
1604 }
1605
1606#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1607 void print(raw_ostream &O, const Twine &Indent,
1608 VPSlotTracker &SlotTracker) const override;
1609#endif
1610
1611 /// Returns the result type of the cast.
1612 Type *getResultType() const { return ResultTy; }
1613
1614 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1615 // At the moment, only uniform codegen is implemented.
1617 "Op must be an operand of the recipe");
1618 return true;
1619 }
1620};
1621
1622/// A recipe for widening vector intrinsics.
1624 /// ID of the vector intrinsic to widen.
1625 Intrinsic::ID VectorIntrinsicID;
1626
1627 /// Scalar return type of the intrinsic.
1628 Type *ResultTy;
1629
1630 /// True if the intrinsic may read from memory.
1631 bool MayReadFromMemory;
1632
1633 /// True if the intrinsic may read write to memory.
1634 bool MayWriteToMemory;
1635
1636 /// True if the intrinsic may have side-effects.
1637 bool MayHaveSideEffects;
1638
1639public:
1641 ArrayRef<VPValue *> CallArguments, Type *Ty,
1642 DebugLoc DL = {})
1643 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1644 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1645 MayReadFromMemory(CI.mayReadFromMemory()),
1646 MayWriteToMemory(CI.mayWriteToMemory()),
1647 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1648
1650 ArrayRef<VPValue *> CallArguments, Type *Ty,
1651 DebugLoc DL = {})
1652 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, DL),
1653 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1654 LLVMContext &Ctx = Ty->getContext();
1655 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1656 MemoryEffects ME = Attrs.getMemoryEffects();
1657 MayReadFromMemory = ME.onlyWritesMemory();
1658 MayWriteToMemory = ME.onlyReadsMemory();
1659 MayHaveSideEffects = MayWriteToMemory ||
1660 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1661 !Attrs.hasFnAttr(Attribute::WillReturn);
1662 }
1663
1665 std::initializer_list<VPValue *> CallArguments,
1666 Type *Ty, DebugLoc DL = {})
1667 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1668 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1669
1670 ~VPWidenIntrinsicRecipe() override = default;
1671
1673 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1674 VectorIntrinsicID, {op_begin(), op_end()},
1675 ResultTy, getDebugLoc());
1676 }
1677
1678 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1679
1680 /// Produce a widened version of the vector intrinsic.
1681 void execute(VPTransformState &State) override;
1682
1683 /// Return the cost of this vector intrinsic.
1685 VPCostContext &Ctx) const override;
1686
1687 /// Return the ID of the intrinsic.
1688 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
1689
1690 /// Return the scalar return type of the intrinsic.
1691 Type *getResultType() const { return ResultTy; }
1692
1693 /// Return to name of the intrinsic as string.
1695
1696 /// Returns true if the intrinsic may read from memory.
1697 bool mayReadFromMemory() const { return MayReadFromMemory; }
1698
1699 /// Returns true if the intrinsic may write to memory.
1700 bool mayWriteToMemory() const { return MayWriteToMemory; }
1701
1702 /// Returns true if the intrinsic may have side-effects.
1703 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1704
1705#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1706 /// Print the recipe.
1707 void print(raw_ostream &O, const Twine &Indent,
1708 VPSlotTracker &SlotTracker) const override;
1709#endif
1710
1711 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1712};
1713
1714/// A recipe for widening Call instructions using library calls.
1716 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1717 /// between a given VF and the chosen vectorized variant, so there will be a
1718 /// different VPlan for each VF with a valid variant.
1719 Function *Variant;
1720
1721public:
1723 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1724 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1725 *cast<Instruction>(UV)),
1726 Variant(Variant) {
1727 assert(
1728 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1729 "last operand must be the called function");
1730 }
1731
1732 ~VPWidenCallRecipe() override = default;
1733
1735 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1736 {op_begin(), op_end()}, getDebugLoc());
1737 }
1738
1739 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1740
1741 /// Produce a widened version of the call instruction.
1742 void execute(VPTransformState &State) override;
1743
1744 /// Return the cost of this VPWidenCallRecipe.
1746 VPCostContext &Ctx) const override;
1747
1749 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1750 }
1751
1753 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1754 }
1756 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1757 }
1758
1759#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1760 /// Print the recipe.
1761 void print(raw_ostream &O, const Twine &Indent,
1762 VPSlotTracker &SlotTracker) const override;
1763#endif
1764};
1765
1766/// A recipe representing a sequence of load -> update -> store as part of
1767/// a histogram operation. This means there may be aliasing between vector
1768/// lanes, which is handled by the llvm.experimental.vector.histogram family
1769/// of intrinsics. The only update operations currently supported are
1770/// 'add' and 'sub' where the other term is loop-invariant.
1772 /// Opcode of the update operation, currently either add or sub.
1773 unsigned Opcode;
1774
1775public:
1776 template <typename IterT>
1777 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1778 DebugLoc DL = {})
1779 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1780
1781 ~VPHistogramRecipe() override = default;
1782
1784 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1785 }
1786
1787 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1788
1789 /// Produce a vectorized histogram operation.
1790 void execute(VPTransformState &State) override;
1791
1792 /// Return the cost of this VPHistogramRecipe.
1794 VPCostContext &Ctx) const override;
1795
1796 unsigned getOpcode() const { return Opcode; }
1797
1798 /// Return the mask operand if one was provided, or a null pointer if all
1799 /// lanes should be executed unconditionally.
1800 VPValue *getMask() const {
1801 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1802 }
1803
1804#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1805 /// Print the recipe
1806 void print(raw_ostream &O, const Twine &Indent,
1807 VPSlotTracker &SlotTracker) const override;
1808#endif
1809};
1810
1811/// A recipe for widening select instructions.
1813 template <typename IterT>
1815 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1816 I.getDebugLoc()) {}
1817
1818 ~VPWidenSelectRecipe() override = default;
1819
1821 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1822 operands());
1823 }
1824
1825 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1826
1827 /// Produce a widened version of the select instruction.
1828 void execute(VPTransformState &State) override;
1829
1830 /// Return the cost of this VPWidenSelectRecipe.
1832 VPCostContext &Ctx) const override;
1833
1834#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1835 /// Print the recipe.
1836 void print(raw_ostream &O, const Twine &Indent,
1837 VPSlotTracker &SlotTracker) const override;
1838#endif
1839
1840 VPValue *getCond() const {
1841 return getOperand(0);
1842 }
1843
1844 bool isInvariantCond() const {
1846 }
1847};
1848
1849/// A recipe for handling GEP instructions.
1851 bool isPointerLoopInvariant() const {
1853 }
1854
1855 bool isIndexLoopInvariant(unsigned I) const {
1857 }
1858
1859 bool areAllOperandsInvariant() const {
1860 return all_of(operands(), [](VPValue *Op) {
1861 return Op->isDefinedOutsideLoopRegions();
1862 });
1863 }
1864
1865public:
1866 template <typename IterT>
1868 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1869
1870 ~VPWidenGEPRecipe() override = default;
1871
1873 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1874 operands());
1875 }
1876
1877 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1878
1879 /// Generate the gep nodes.
1880 void execute(VPTransformState &State) override;
1881
1882 /// Return the cost of this VPWidenGEPRecipe.
1884 VPCostContext &Ctx) const override {
1885 // TODO: Compute accurate cost after retiring the legacy cost model.
1886 return 0;
1887 }
1888
1889#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1890 /// Print the recipe.
1891 void print(raw_ostream &O, const Twine &Indent,
1892 VPSlotTracker &SlotTracker) const override;
1893#endif
1894};
1895
1896/// A recipe to compute the pointers for widened memory accesses of IndexTy
1897/// in reverse order.
1899 public VPUnrollPartAccessor<2> {
1900 Type *IndexedTy;
1901
1902public:
1905 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1906 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1907 IndexedTy(IndexedTy) {}
1908
1909 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1910
1912 const VPValue *getVFValue() const { return getOperand(1); }
1913
1914 void execute(VPTransformState &State) override;
1915
1916 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1918 "Op must be an operand of the recipe");
1919 return true;
1920 }
1921
1922 /// Return the cost of this VPVectorPointerRecipe.
1924 VPCostContext &Ctx) const override {
1925 // TODO: Compute accurate cost after retiring the legacy cost model.
1926 return 0;
1927 }
1928
1929 /// Returns true if the recipe only uses the first part of operand \p Op.
1930 bool onlyFirstPartUsed(const VPValue *Op) const override {
1932 "Op must be an operand of the recipe");
1933 assert(getNumOperands() <= 2 && "must have at most two operands");
1934 return true;
1935 }
1936
1939 IndexedTy, getGEPNoWrapFlags(),
1940 getDebugLoc());
1941 }
1942
1943#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1944 /// Print the recipe.
1945 void print(raw_ostream &O, const Twine &Indent,
1946 VPSlotTracker &SlotTracker) const override;
1947#endif
1948};
1949
1950/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1952 public VPUnrollPartAccessor<1> {
1953 Type *IndexedTy;
1954
1955public:
1957 DebugLoc DL)
1958 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1959 GEPFlags, DL),
1960 IndexedTy(IndexedTy) {}
1961
1962 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1963
1964 void execute(VPTransformState &State) override;
1965
1966 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1968 "Op must be an operand of the recipe");
1969 return true;
1970 }
1971
1972 /// Returns true if the recipe only uses the first part of operand \p Op.
1973 bool onlyFirstPartUsed(const VPValue *Op) const override {
1975 "Op must be an operand of the recipe");
1976 assert(getNumOperands() <= 2 && "must have at most two operands");
1977 return true;
1978 }
1979
1981 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1983 }
1984
1985 /// Return the cost of this VPHeaderPHIRecipe.
1987 VPCostContext &Ctx) const override {
1988 // TODO: Compute accurate cost after retiring the legacy cost model.
1989 return 0;
1990 }
1991
1992#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1993 /// Print the recipe.
1994 void print(raw_ostream &O, const Twine &Indent,
1995 VPSlotTracker &SlotTracker) const override;
1996#endif
1997};
1998
1999/// A pure virtual base class for all recipes modeling header phis, including
2000/// phis for first order recurrences, pointer inductions and reductions. The
2001/// start value is the first operand of the recipe and the incoming value from
2002/// the backedge is the second operand.
2003///
2004/// Inductions are modeled using the following sub-classes:
2005/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2006/// starting at a specified value (zero for the main vector loop, the resume
2007/// value for the epilogue vector loop) and stepping by 1. The induction
2008/// controls exiting of the vector loop by comparing against the vector trip
2009/// count. Produces a single scalar PHI for the induction value per
2010/// iteration.
2011/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2012/// floating point inductions with arbitrary start and step values. Produces
2013/// a vector PHI per-part.
2014/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2015/// value of an IV with different start and step values. Produces a single
2016/// scalar value per iteration
2017/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2018/// canonical or derived induction.
2019/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2020/// pointer induction. Produces either a vector PHI per-part or scalar values
2021/// per-lane based on the canonical induction.
2023protected:
2024 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2025 VPValue *Start = nullptr, DebugLoc DL = {})
2026 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2027 if (Start)
2028 addOperand(Start);
2029 }
2030
2031public:
2032 ~VPHeaderPHIRecipe() override = default;
2033
2034 /// Method to support type inquiry through isa, cast, and dyn_cast.
2035 static inline bool classof(const VPRecipeBase *B) {
2036 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2037 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2038 }
2039 static inline bool classof(const VPValue *V) {
2040 auto *B = V->getDefiningRecipe();
2041 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2042 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2043 }
2044
2045 /// Generate the phi nodes.
2046 void execute(VPTransformState &State) override = 0;
2047
2048 /// Return the cost of this header phi recipe.
2050 VPCostContext &Ctx) const override;
2051
2052#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2053 /// Print the recipe.
2054 void print(raw_ostream &O, const Twine &Indent,
2055 VPSlotTracker &SlotTracker) const override = 0;
2056#endif
2057
2058 /// Returns the start value of the phi, if one is set.
2060 return getNumOperands() == 0 ? nullptr : getOperand(0);
2061 }
2063 return getNumOperands() == 0 ? nullptr : getOperand(0);
2064 }
2065
2066 /// Update the start value of the recipe.
2068
2069 /// Returns the incoming value from the loop backedge.
2071 return getOperand(1);
2072 }
2073
2074 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2075 /// to be a recipe.
2078 }
2079};
2080
2081/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2082/// VPWidenPointerInductionRecipe), providing shared functionality, including
2083/// retrieving the step value, induction descriptor and original phi node.
2085 const InductionDescriptor &IndDesc;
2086
2087public:
2088 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2089 VPValue *Step, const InductionDescriptor &IndDesc,
2090 DebugLoc DL)
2091 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2092 addOperand(Step);
2093 }
2094
2095 static inline bool classof(const VPRecipeBase *R) {
2096 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2097 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2098 }
2099
2100 static inline bool classof(const VPValue *V) {
2101 auto *R = V->getDefiningRecipe();
2102 return R && classof(R);
2103 }
2104
2105 static inline bool classof(const VPHeaderPHIRecipe *R) {
2106 return classof(static_cast<const VPRecipeBase *>(R));
2107 }
2108
2109 virtual void execute(VPTransformState &State) override = 0;
2110
2111 /// Returns the step value of the induction.
2113 const VPValue *getStepValue() const { return getOperand(1); }
2114
2115 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2116
2117 /// Returns the induction descriptor for the recipe.
2118 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2119
2121 // TODO: All operands of base recipe must exist and be at same index in
2122 // derived recipe.
2124 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2125 }
2126
2128 // TODO: All operands of base recipe must exist and be at same index in
2129 // derived recipe.
2131 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2132 }
2133};
2134
2135/// A recipe for handling phi nodes of integer and floating-point inductions,
2136/// producing their vector values.
2138 TruncInst *Trunc;
2139
2140public:
2142 VPValue *VF, const InductionDescriptor &IndDesc,
2143 DebugLoc DL)
2144 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2145 Step, IndDesc, DL),
2146 Trunc(nullptr) {
2147 addOperand(VF);
2148 }
2149
2151 VPValue *VF, const InductionDescriptor &IndDesc,
2152 TruncInst *Trunc, DebugLoc DL)
2153 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2154 Step, IndDesc, DL),
2155 Trunc(Trunc) {
2156 addOperand(VF);
2157 }
2158
2160
2165 }
2166
2167 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2168
2169 /// Generate the vectorized and scalarized versions of the phi node as
2170 /// needed by their users.
2171 void execute(VPTransformState &State) override;
2172
2173#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2174 /// Print the recipe.
2175 void print(raw_ostream &O, const Twine &Indent,
2176 VPSlotTracker &SlotTracker) const override;
2177#endif
2178
2180 const VPValue *getVFValue() const { return getOperand(2); }
2181
2183 // If the recipe has been unrolled (4 operands), return the VPValue for the
2184 // induction increment.
2185 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2186 }
2187
2188 /// Returns the first defined value as TruncInst, if it is one or nullptr
2189 /// otherwise.
2190 TruncInst *getTruncInst() { return Trunc; }
2191 const TruncInst *getTruncInst() const { return Trunc; }
2192
2193 /// Returns true if the induction is canonical, i.e. starting at 0 and
2194 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2195 /// same type as the canonical induction.
2196 bool isCanonical() const;
2197
2198 /// Returns the scalar type of the induction.
2200 return Trunc ? Trunc->getType() : getPHINode()->getType();
2201 }
2202
2203 /// Returns the VPValue representing the value of this induction at
2204 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2205 /// take place.
2207 return getNumOperands() == 5 ? getOperand(4) : this;
2208 }
2209};
2210
2212 public VPUnrollPartAccessor<3> {
2213 bool IsScalarAfterVectorization;
2214
2215public:
2216 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2217 /// Start.
2219 const InductionDescriptor &IndDesc,
2220 bool IsScalarAfterVectorization, DebugLoc DL)
2221 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2222 Step, IndDesc, DL),
2223 IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2224
2226
2229 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2230 getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2231 }
2232
2233 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2234
2235 /// Generate vector values for the pointer induction.
2236 void execute(VPTransformState &State) override;
2237
2238 /// Returns true if only scalar values will be generated.
2239 bool onlyScalarsGenerated(bool IsScalable);
2240
2241 /// Returns the VPValue representing the value of this induction at
2242 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2243 /// take place.
2245 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2246 }
2247
2248#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2249 /// Print the recipe.
2250 void print(raw_ostream &O, const Twine &Indent,
2251 VPSlotTracker &SlotTracker) const override;
2252#endif
2253};
2254
2255/// Recipe to generate a scalar PHI. Used to generate code for recipes that
2256/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2257/// VPEVLBasedIVPHIRecipe.
2259 std::string Name;
2260
2261public:
2262 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2263 StringRef Name)
2264 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2265 Name(Name.str()) {
2266 addOperand(BackedgeValue);
2267 }
2268
2269 ~VPScalarPHIRecipe() override = default;
2270
2272 llvm_unreachable("cloning not implemented yet");
2273 }
2274
2275 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2276
2277 /// Generate the phi/select nodes.
2278 void execute(VPTransformState &State) override;
2279
2280 /// Returns true if the recipe only uses the first lane of operand \p Op.
2281 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2283 "Op must be an operand of the recipe");
2284 return true;
2285 }
2286
2287#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2288 /// Print the recipe.
2289 void print(raw_ostream &O, const Twine &Indent,
2290 VPSlotTracker &SlotTracker) const override;
2291#endif
2292};
2293
2294/// A recipe for handling phis that are widened in the vector loop.
2295/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2296/// managed in the recipe directly.
2298 /// List of incoming blocks. Only used in the VPlan native path.
2299 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2300
2301public:
2302 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2303 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
2304 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
2305 if (Start)
2306 addOperand(Start);
2307 }
2308
2310 llvm_unreachable("cloning not implemented yet");
2311 }
2312
2313 ~VPWidenPHIRecipe() override = default;
2314
2315 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2316
2317 /// Generate the phi/select nodes.
2318 void execute(VPTransformState &State) override;
2319
2320#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2321 /// Print the recipe.
2322 void print(raw_ostream &O, const Twine &Indent,
2323 VPSlotTracker &SlotTracker) const override;
2324#endif
2325
2326 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2327 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2328 addOperand(IncomingV);
2329 IncomingBlocks.push_back(IncomingBlock);
2330 }
2331
2332 /// Returns the \p I th incoming VPBasicBlock.
2333 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2334
2335 /// Returns the \p I th incoming VPValue.
2336 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2337};
2338
2339/// A recipe for handling first-order recurrence phis. The start value is the
2340/// first operand of the recipe and the incoming value from the backedge is the
2341/// second operand.
2344 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2345
2346 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2347
2349 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2350 }
2351
2354 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2355 }
2356
2357 void execute(VPTransformState &State) override;
2358
2359 /// Return the cost of this first-order recurrence phi recipe.
2361 VPCostContext &Ctx) const override;
2362
2363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2364 /// Print the recipe.
2365 void print(raw_ostream &O, const Twine &Indent,
2366 VPSlotTracker &SlotTracker) const override;
2367#endif
2368};
2369
2370/// A recipe for handling reduction phis. The start value is the first operand
2371/// of the recipe and the incoming value from the backedge is the second
2372/// operand.
2374 public VPUnrollPartAccessor<2> {
2375 /// Descriptor for the reduction.
2376 const RecurrenceDescriptor &RdxDesc;
2377
2378 /// The phi is part of an in-loop reduction.
2379 bool IsInLoop;
2380
2381 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2382 bool IsOrdered;
2383
2384public:
2385 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2386 /// RdxDesc.
2388 VPValue &Start, bool IsInLoop = false,
2389 bool IsOrdered = false)
2390 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2391 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
2392 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2393 }
2394
2395 ~VPReductionPHIRecipe() override = default;
2396
2398 auto *R =
2399 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
2400 *getOperand(0), IsInLoop, IsOrdered);
2401 R->addOperand(getBackedgeValue());
2402 return R;
2403 }
2404
2405 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2406
2408 return R->getVPDefID() == VPDef::VPReductionPHISC;
2409 }
2410
2411 /// Generate the phi/select nodes.
2412 void execute(VPTransformState &State) override;
2413
2414#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2415 /// Print the recipe.
2416 void print(raw_ostream &O, const Twine &Indent,
2417 VPSlotTracker &SlotTracker) const override;
2418#endif
2419
2421 return RdxDesc;
2422 }
2423
2424 /// Returns true, if the phi is part of an ordered reduction.
2425 bool isOrdered() const { return IsOrdered; }
2426
2427 /// Returns true, if the phi is part of an in-loop reduction.
2428 bool isInLoop() const { return IsInLoop; }
2429};
2430
2431/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2432/// instructions.
2434public:
2435 /// The blend operation is a User of the incoming values and of their
2436 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2437 /// be omitted (implied by passing an odd number of operands) in which case
2438 /// all other incoming values are merged into it.
2440 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2441 assert(Operands.size() > 0 && "Expected at least one operand!");
2442 }
2443
2444 VPBlendRecipe *clone() override {
2446 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2447 }
2448
2449 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2450
2451 /// A normalized blend is one that has an odd number of operands, whereby the
2452 /// first operand does not have an associated mask.
2453 bool isNormalized() const { return getNumOperands() % 2; }
2454
2455 /// Return the number of incoming values, taking into account when normalized
2456 /// the first incoming value will have no mask.
2457 unsigned getNumIncomingValues() const {
2458 return (getNumOperands() + isNormalized()) / 2;
2459 }
2460
2461 /// Return incoming value number \p Idx.
2462 VPValue *getIncomingValue(unsigned Idx) const {
2463 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2464 }
2465
2466 /// Return mask number \p Idx.
2467 VPValue *getMask(unsigned Idx) const {
2468 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2469 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2470 }
2471
2472 /// Generate the phi/select nodes.
2473 void execute(VPTransformState &State) override;
2474
2475 /// Return the cost of this VPWidenMemoryRecipe.
2477 VPCostContext &Ctx) const override;
2478
2479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2480 /// Print the recipe.
2481 void print(raw_ostream &O, const Twine &Indent,
2482 VPSlotTracker &SlotTracker) const override;
2483#endif
2484
2485 /// Returns true if the recipe only uses the first lane of operand \p Op.
2486 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2488 "Op must be an operand of the recipe");
2489 // Recursing through Blend recipes only, must terminate at header phi's the
2490 // latest.
2491 return all_of(users(),
2492 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2493 }
2494};
2495
2496/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2497/// or stores into one wide load/store and shuffles. The first operand of a
2498/// VPInterleave recipe is the address, followed by the stored values, followed
2499/// by an optional mask.
2502
2503 /// Indicates if the interleave group is in a conditional block and requires a
2504 /// mask.
2505 bool HasMask = false;
2506
2507 /// Indicates if gaps between members of the group need to be masked out or if
2508 /// unusued gaps can be loaded speculatively.
2509 bool NeedsMaskForGaps = false;
2510
2511public:
2513 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2514 bool NeedsMaskForGaps)
2515 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2516 NeedsMaskForGaps(NeedsMaskForGaps) {
2517 for (unsigned i = 0; i < IG->getFactor(); ++i)
2518 if (Instruction *I = IG->getMember(i)) {
2519 if (I->getType()->isVoidTy())
2520 continue;
2521 new VPValue(I, this);
2522 }
2523
2524 for (auto *SV : StoredValues)
2525 addOperand(SV);
2526 if (Mask) {
2527 HasMask = true;
2528 addOperand(Mask);
2529 }
2530 }
2531 ~VPInterleaveRecipe() override = default;
2532
2534 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2535 NeedsMaskForGaps);
2536 }
2537
2538 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2539
2540 /// Return the address accessed by this recipe.
2541 VPValue *getAddr() const {
2542 return getOperand(0); // Address is the 1st, mandatory operand.
2543 }
2544
2545 /// Return the mask used by this recipe. Note that a full mask is represented
2546 /// by a nullptr.
2547 VPValue *getMask() const {
2548 // Mask is optional and therefore the last, currently 2nd operand.
2549 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2550 }
2551
2552 /// Return the VPValues stored by this interleave group. If it is a load
2553 /// interleave group, return an empty ArrayRef.
2555 // The first operand is the address, followed by the stored values, followed
2556 // by an optional mask.
2559 }
2560
2561 /// Generate the wide load or store, and shuffles.
2562 void execute(VPTransformState &State) override;
2563
2564 /// Return the cost of this VPInterleaveRecipe.
2566 VPCostContext &Ctx) const override;
2567
2568#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2569 /// Print the recipe.
2570 void print(raw_ostream &O, const Twine &Indent,
2571 VPSlotTracker &SlotTracker) const override;
2572#endif
2573
2575
2576 /// Returns the number of stored operands of this interleave group. Returns 0
2577 /// for load interleave groups.
2578 unsigned getNumStoreOperands() const {
2579 return getNumOperands() - (HasMask ? 2 : 1);
2580 }
2581
2582 /// The recipe only uses the first lane of the address.
2583 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2585 "Op must be an operand of the recipe");
2586 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2587 }
2588
2589 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2590};
2591
2592/// A recipe to represent inloop reduction operations, performing a reduction on
2593/// a vector operand into a scalar value, and adding the result to a chain.
2594/// The Operands are {ChainOp, VecOp, [Condition]}.
2596 /// The recurrence decriptor for the reduction in question.
2597 const RecurrenceDescriptor &RdxDesc;
2598 bool IsOrdered;
2599 /// Whether the reduction is conditional.
2600 bool IsConditional = false;
2601
2602protected:
2603 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2605 VPValue *CondOp, bool IsOrdered, DebugLoc DL)
2606 : VPSingleDefRecipe(SC, Operands, I, DL), RdxDesc(R),
2607 IsOrdered(IsOrdered) {
2608 if (CondOp) {
2609 IsConditional = true;
2610 addOperand(CondOp);
2611 }
2612 }
2613
2614public:
2616 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2617 bool IsOrdered, DebugLoc DL = {})
2618 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2619 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2620 IsOrdered, DL) {}
2621
2622 ~VPReductionRecipe() override = default;
2623
2625 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2626 getVecOp(), getCondOp(), IsOrdered,
2627 getDebugLoc());
2628 }
2629
2630 static inline bool classof(const VPRecipeBase *R) {
2631 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2632 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2633 }
2634
2635 static inline bool classof(const VPUser *U) {
2636 auto *R = dyn_cast<VPRecipeBase>(U);
2637 return R && classof(R);
2638 }
2639
2640 /// Generate the reduction in the loop
2641 void execute(VPTransformState &State) override;
2642
2643 /// Return the cost of VPReductionRecipe.
2645 VPCostContext &Ctx) const override;
2646
2647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2648 /// Print the recipe.
2649 void print(raw_ostream &O, const Twine &Indent,
2650 VPSlotTracker &SlotTracker) const override;
2651#endif
2652
2653 /// Return the recurrence decriptor for the in-loop reduction.
2655 return RdxDesc;
2656 }
2657 /// Return true if the in-loop reduction is ordered.
2658 bool isOrdered() const { return IsOrdered; };
2659 /// Return true if the in-loop reduction is conditional.
2660 bool isConditional() const { return IsConditional; };
2661 /// The VPValue of the scalar Chain being accumulated.
2662 VPValue *getChainOp() const { return getOperand(0); }
2663 /// The VPValue of the vector value to be reduced.
2664 VPValue *getVecOp() const { return getOperand(1); }
2665 /// The VPValue of the condition for the block.
2667 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2668 }
2669};
2670
2671/// A recipe to represent inloop reduction operations with vector-predication
2672/// intrinsics, performing a reduction on a vector operand with the explicit
2673/// vector length (EVL) into a scalar value, and adding the result to a chain.
2674/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2676public:
2679 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2681 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2682 R.isOrdered(), R.getDebugLoc()) {}
2683
2684 ~VPReductionEVLRecipe() override = default;
2685
2687 llvm_unreachable("cloning not implemented yet");
2688 }
2689
2690 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2691
2692 /// Generate the reduction in the loop
2693 void execute(VPTransformState &State) override;
2694
2695#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2696 /// Print the recipe.
2697 void print(raw_ostream &O, const Twine &Indent,
2698 VPSlotTracker &SlotTracker) const override;
2699#endif
2700
2701 /// The VPValue of the explicit vector length.
2702 VPValue *getEVL() const { return getOperand(2); }
2703
2704 /// Returns true if the recipe only uses the first lane of operand \p Op.
2705 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2707 "Op must be an operand of the recipe");
2708 return Op == getEVL();
2709 }
2710};
2711
2712/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2713/// copies of the original scalar type, one per lane, instead of producing a
2714/// single copy of widened type for all lanes. If the instruction is known to be
2715/// uniform only one copy, per lane zero, will be generated.
2717 /// Indicator if only a single replica per lane is needed.
2718 bool IsUniform;
2719
2720 /// Indicator if the replicas are also predicated.
2721 bool IsPredicated;
2722
2723public:
2724 template <typename IterT>
2726 bool IsUniform, VPValue *Mask = nullptr)
2727 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2728 IsUniform(IsUniform), IsPredicated(Mask) {
2729 if (Mask)
2730 addOperand(Mask);
2731 }
2732
2733 ~VPReplicateRecipe() override = default;
2734
2736 auto *Copy =
2737 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2738 isPredicated() ? getMask() : nullptr);
2739 Copy->transferFlags(*this);
2740 return Copy;
2741 }
2742
2743 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2744
2745 /// Generate replicas of the desired Ingredient. Replicas will be generated
2746 /// for all parts and lanes unless a specific part and lane are specified in
2747 /// the \p State.
2748 void execute(VPTransformState &State) override;
2749
2750 /// Return the cost of this VPReplicateRecipe.
2752 VPCostContext &Ctx) const override;
2753
2754#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2755 /// Print the recipe.
2756 void print(raw_ostream &O, const Twine &Indent,
2757 VPSlotTracker &SlotTracker) const override;
2758#endif
2759
2760 bool isUniform() const { return IsUniform; }
2761
2762 bool isPredicated() const { return IsPredicated; }
2763
2764 /// Returns true if the recipe only uses the first lane of operand \p Op.
2765 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2767 "Op must be an operand of the recipe");
2768 return isUniform();
2769 }
2770
2771 /// Returns true if the recipe uses scalars of operand \p Op.
2772 bool usesScalars(const VPValue *Op) const override {
2774 "Op must be an operand of the recipe");
2775 return true;
2776 }
2777
2778 /// Returns true if the recipe is used by a widened recipe via an intervening
2779 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2780 /// in a vector.
2781 bool shouldPack() const;
2782
2783 /// Return the mask of a predicated VPReplicateRecipe.
2785 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2786 return getOperand(getNumOperands() - 1);
2787 }
2788
2789 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2790};
2791
2792/// A recipe for generating conditional branches on the bits of a mask.
2794public:
2796 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2797 if (BlockInMask) // nullptr means all-one mask.
2798 addOperand(BlockInMask);
2799 }
2800
2802 return new VPBranchOnMaskRecipe(getOperand(0));
2803 }
2804
2805 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2806
2807 /// Generate the extraction of the appropriate bit from the block mask and the
2808 /// conditional branch.
2809 void execute(VPTransformState &State) override;
2810
2811 /// Return the cost of this VPBranchOnMaskRecipe.
2813 VPCostContext &Ctx) const override;
2814
2815#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2816 /// Print the recipe.
2817 void print(raw_ostream &O, const Twine &Indent,
2818 VPSlotTracker &SlotTracker) const override {
2819 O << Indent << "BRANCH-ON-MASK ";
2820 if (VPValue *Mask = getMask())
2821 Mask->printAsOperand(O, SlotTracker);
2822 else
2823 O << " All-One";
2824 }
2825#endif
2826
2827 /// Return the mask used by this recipe. Note that a full mask is represented
2828 /// by a nullptr.
2829 VPValue *getMask() const {
2830 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2831 // Mask is optional.
2832 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2833 }
2834
2835 /// Returns true if the recipe uses scalars of operand \p Op.
2836 bool usesScalars(const VPValue *Op) const override {
2838 "Op must be an operand of the recipe");
2839 return true;
2840 }
2841};
2842
2843/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2844/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2845/// order to merge values that are set under such a branch and feed their uses.
2846/// The phi nodes can be scalar or vector depending on the users of the value.
2847/// This recipe works in concert with VPBranchOnMaskRecipe.
2849public:
2850 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2851 /// nodes after merging back from a Branch-on-Mask.
2853 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV, DL) {}
2854 ~VPPredInstPHIRecipe() override = default;
2855
2857 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
2858 }
2859
2860 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2861
2862 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2863 /// retain SSA form.
2864 void execute(VPTransformState &State) override;
2865
2866 /// Return the cost of this VPPredInstPHIRecipe.
2868 VPCostContext &Ctx) const override {
2869 // TODO: Compute accurate cost after retiring the legacy cost model.
2870 return 0;
2871 }
2872
2873#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2874 /// Print the recipe.
2875 void print(raw_ostream &O, const Twine &Indent,
2876 VPSlotTracker &SlotTracker) const override;
2877#endif
2878
2879 /// Returns true if the recipe uses scalars of operand \p Op.
2880 bool usesScalars(const VPValue *Op) const override {
2882 "Op must be an operand of the recipe");
2883 return true;
2884 }
2885};
2886
2887/// A common base class for widening memory operations. An optional mask can be
2888/// provided as the last operand.
2890protected:
2892
2893 /// Whether the accessed addresses are consecutive.
2895
2896 /// Whether the consecutive accessed addresses are in reverse order.
2898
2899 /// Whether the memory access is masked.
2900 bool IsMasked = false;
2901
2902 void setMask(VPValue *Mask) {
2903 assert(!IsMasked && "cannot re-set mask");
2904 if (!Mask)
2905 return;
2906 addOperand(Mask);
2907 IsMasked = true;
2908 }
2909
2910 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2911 std::initializer_list<VPValue *> Operands,
2912 bool Consecutive, bool Reverse, DebugLoc DL)
2914 Reverse(Reverse) {
2915 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2916 }
2917
2918public:
2920 llvm_unreachable("cloning not supported");
2921 }
2922
2923 static inline bool classof(const VPRecipeBase *R) {
2924 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2925 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2926 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2927 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2928 }
2929
2930 static inline bool classof(const VPUser *U) {
2931 auto *R = dyn_cast<VPRecipeBase>(U);
2932 return R && classof(R);
2933 }
2934
2935 /// Return whether the loaded-from / stored-to addresses are consecutive.
2936 bool isConsecutive() const { return Consecutive; }
2937
2938 /// Return whether the consecutive loaded/stored addresses are in reverse
2939 /// order.
2940 bool isReverse() const { return Reverse; }
2941
2942 /// Return the address accessed by this recipe.
2943 VPValue *getAddr() const { return getOperand(0); }
2944
2945 /// Returns true if the recipe is masked.
2946 bool isMasked() const { return IsMasked; }
2947
2948 /// Return the mask used by this recipe. Note that a full mask is represented
2949 /// by a nullptr.
2950 VPValue *getMask() const {
2951 // Mask is optional and therefore the last operand.
2952 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2953 }
2954
2955 /// Generate the wide load/store.
2956 void execute(VPTransformState &State) override {
2957 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2958 }
2959
2960 /// Return the cost of this VPWidenMemoryRecipe.
2962 VPCostContext &Ctx) const override;
2963
2965};
2966
2967/// A recipe for widening load operations, using the address to load from and an
2968/// optional mask.
2969struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2971 bool Consecutive, bool Reverse, DebugLoc DL)
2972 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2973 Reverse, DL),
2974 VPValue(this, &Load) {
2975 setMask(Mask);
2976 }
2977
2979 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2981 getDebugLoc());
2982 }
2983
2984 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2985
2986 /// Generate a wide load or gather.
2987 void execute(VPTransformState &State) override;
2988
2989#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2990 /// Print the recipe.
2991 void print(raw_ostream &O, const Twine &Indent,
2992 VPSlotTracker &SlotTracker) const override;
2993#endif
2994
2995 /// Returns true if the recipe only uses the first lane of operand \p Op.
2996 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2998 "Op must be an operand of the recipe");
2999 // Widened, consecutive loads operations only demand the first lane of
3000 // their address.
3001 return Op == getAddr() && isConsecutive();
3002 }
3003};
3004
3005/// A recipe for widening load operations with vector-predication intrinsics,
3006/// using the address to load from, the explicit vector length and an optional
3007/// mask.
3008struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3010 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3011 {L.getAddr(), &EVL}, L.isConsecutive(),
3012 L.isReverse(), L.getDebugLoc()),
3013 VPValue(this, &getIngredient()) {
3014 setMask(Mask);
3015 }
3016
3017 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3018
3019 /// Return the EVL operand.
3020 VPValue *getEVL() const { return getOperand(1); }
3021
3022 /// Generate the wide load or gather.
3023 void execute(VPTransformState &State) override;
3024
3025 /// Return the cost of this VPWidenLoadEVLRecipe.
3027 VPCostContext &Ctx) const override;
3028
3029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3030 /// Print the recipe.
3031 void print(raw_ostream &O, const Twine &Indent,
3032 VPSlotTracker &SlotTracker) const override;
3033#endif
3034
3035 /// Returns true if the recipe only uses the first lane of operand \p Op.
3036 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3038 "Op must be an operand of the recipe");
3039 // Widened loads only demand the first lane of EVL and consecutive loads
3040 // only demand the first lane of their address.
3041 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3042 }
3043};
3044
3045/// A recipe for widening store operations, using the stored value, the address
3046/// to store to and an optional mask.
3049 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3050 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3052 setMask(Mask);
3053 }
3054
3056 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3058 Reverse, getDebugLoc());
3059 }
3060
3061 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3062
3063 /// Return the value stored by this recipe.
3064 VPValue *getStoredValue() const { return getOperand(1); }
3065
3066 /// Generate a wide store or scatter.
3067 void execute(VPTransformState &State) override;
3068
3069#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3070 /// Print the recipe.
3071 void print(raw_ostream &O, const Twine &Indent,
3072 VPSlotTracker &SlotTracker) const override;
3073#endif
3074
3075 /// Returns true if the recipe only uses the first lane of operand \p Op.
3076 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3078 "Op must be an operand of the recipe");
3079 // Widened, consecutive stores only demand the first lane of their address,
3080 // unless the same operand is also stored.
3081 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3082 }
3083};
3084
3085/// A recipe for widening store operations with vector-predication intrinsics,
3086/// using the value to store, the address to store to, the explicit vector
3087/// length and an optional mask.
3090 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3091 {S.getAddr(), S.getStoredValue(), &EVL},
3092 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3093 setMask(Mask);
3094 }
3095
3096 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3097
3098 /// Return the address accessed by this recipe.
3099 VPValue *getStoredValue() const { return getOperand(1); }
3100
3101 /// Return the EVL operand.
3102 VPValue *getEVL() const { return getOperand(2); }
3103
3104 /// Generate the wide store or scatter.
3105 void execute(VPTransformState &State) override;
3106
3107 /// Return the cost of this VPWidenStoreEVLRecipe.
3109 VPCostContext &Ctx) const override;
3110
3111#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3112 /// Print the recipe.
3113 void print(raw_ostream &O, const Twine &Indent,
3114 VPSlotTracker &SlotTracker) const override;
3115#endif
3116
3117 /// Returns true if the recipe only uses the first lane of operand \p Op.
3118 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3120 "Op must be an operand of the recipe");
3121 if (Op == getEVL()) {
3122 assert(getStoredValue() != Op && "unexpected store of EVL");
3123 return true;
3124 }
3125 // Widened, consecutive memory operations only demand the first lane of
3126 // their address, unless the same operand is also stored. That latter can
3127 // happen with opaque pointers.
3128 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3129 }
3130};
3131
3132/// Recipe to expand a SCEV expression.
3134 const SCEV *Expr;
3135 ScalarEvolution &SE;
3136
3137public:
3139 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3140
3141 ~VPExpandSCEVRecipe() override = default;
3142
3144 return new VPExpandSCEVRecipe(Expr, SE);
3145 }
3146
3147 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3148
3149 /// Generate a canonical vector induction variable of the vector loop, with
3150 void execute(VPTransformState &State) override;
3151
3152 /// Return the cost of this VPExpandSCEVRecipe.
3154 VPCostContext &Ctx) const override {
3155 // TODO: Compute accurate cost after retiring the legacy cost model.
3156 return 0;
3157 }
3158
3159#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3160 /// Print the recipe.
3161 void print(raw_ostream &O, const Twine &Indent,
3162 VPSlotTracker &SlotTracker) const override;
3163#endif
3164
3165 const SCEV *getSCEV() const { return Expr; }
3166};
3167
3168/// Canonical scalar induction phi of the vector loop. Starting at the specified
3169/// start value (either 0 or the resume value when vectorizing the epilogue
3170/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3171/// canonical induction variable.
3173public:
3175 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3176
3177 ~VPCanonicalIVPHIRecipe() override = default;
3178
3180 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3181 R->addOperand(getBackedgeValue());
3182 return R;
3183 }
3184
3185 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3186
3188 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3189 }
3190
3191 void execute(VPTransformState &State) override {
3193 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3194 }
3195
3196#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3197 /// Print the recipe.
3198 void print(raw_ostream &O, const Twine &Indent,
3199 VPSlotTracker &SlotTracker) const override;
3200#endif
3201
3202 /// Returns the scalar type of the induction.
3204 return getStartValue()->getLiveInIRValue()->getType();
3205 }
3206
3207 /// Returns true if the recipe only uses the first lane of operand \p Op.
3208 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3210 "Op must be an operand of the recipe");
3211 return true;
3212 }
3213
3214 /// Returns true if the recipe only uses the first part of operand \p Op.
3215 bool onlyFirstPartUsed(const VPValue *Op) const override {
3217 "Op must be an operand of the recipe");
3218 return true;
3219 }
3220
3221 /// Return the cost of this VPCanonicalIVPHIRecipe.
3223 VPCostContext &Ctx) const override {
3224 // For now, match the behavior of the legacy cost model.
3225 return 0;
3226 }
3227};
3228
3229/// A recipe for generating the active lane mask for the vector loop that is
3230/// used to predicate the vector operations.
3231/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3232/// remove VPActiveLaneMaskPHIRecipe.
3234public:
3236 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3237 DL) {}
3238
3239 ~VPActiveLaneMaskPHIRecipe() override = default;
3240
3243 if (getNumOperands() == 2)
3244 R->addOperand(getOperand(1));
3245 return R;
3246 }
3247
3248 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3249
3251 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3252 }
3253
3254 /// Generate the active lane mask phi of the vector loop.
3255 void execute(VPTransformState &State) override;
3256
3257#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3258 /// Print the recipe.
3259 void print(raw_ostream &O, const Twine &Indent,
3260 VPSlotTracker &SlotTracker) const override;
3261#endif
3262};
3263
3264/// A recipe for generating the phi node for the current index of elements,
3265/// adjusted in accordance with EVL value. It starts at the start value of the
3266/// canonical induction and gets incremented by EVL in each iteration of the
3267/// vector loop.
3269public:
3271 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3272
3273 ~VPEVLBasedIVPHIRecipe() override = default;
3274
3276 llvm_unreachable("cloning not implemented yet");
3277 }
3278
3279 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3280
3282 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3283 }
3284
3285 void execute(VPTransformState &State) override {
3287 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3288 }
3289
3290 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3292 VPCostContext &Ctx) const override {
3293 // For now, match the behavior of the legacy cost model.
3294 return 0;
3295 }
3296
3297 /// Returns true if the recipe only uses the first lane of operand \p Op.
3298 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3300 "Op must be an operand of the recipe");
3301 return true;
3302 }
3303
3304#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3305 /// Print the recipe.
3306 void print(raw_ostream &O, const Twine &Indent,
3307 VPSlotTracker &SlotTracker) const override;
3308#endif
3309};
3310
3311/// A Recipe for widening the canonical induction variable of the vector loop.
3313 public VPUnrollPartAccessor<1> {
3314public:
3316 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3317
3318 ~VPWidenCanonicalIVRecipe() override = default;
3319
3321 return new VPWidenCanonicalIVRecipe(
3322 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3323 }
3324
3325 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3326
3327 /// Generate a canonical vector induction variable of the vector loop, with
3328 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3329 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3330 void execute(VPTransformState &State) override;
3331
3332 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3334 VPCostContext &Ctx) const override {
3335 // TODO: Compute accurate cost after retiring the legacy cost model.
3336 return 0;
3337 }
3338
3339#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3340 /// Print the recipe.
3341 void print(raw_ostream &O, const Twine &Indent,
3342 VPSlotTracker &SlotTracker) const override;
3343#endif
3344};
3345
3346/// A recipe for converting the input value \p IV value to the corresponding
3347/// value of an IV with different start and step values, using Start + IV *
3348/// Step.
3350 /// Kind of the induction.
3352 /// If not nullptr, the floating point induction binary operator. Must be set
3353 /// for floating point inductions.
3354 const FPMathOperator *FPBinOp;
3355
3356 /// Name to use for the generated IR instruction for the derived IV.
3357 std::string Name;
3358
3359public:
3361 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3362 const Twine &Name = "")
3364 IndDesc.getKind(),
3365 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3366 Start, CanonicalIV, Step, Name) {}
3367
3369 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3370 VPValue *Step, const Twine &Name = "")
3371 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3372 FPBinOp(FPBinOp), Name(Name.str()) {}
3373
3374 ~VPDerivedIVRecipe() override = default;
3375
3377 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3378 getStepValue());
3379 }
3380
3381 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3382
3383 /// Generate the transformed value of the induction at offset StartValue (1.
3384 /// operand) + IV (2. operand) * StepValue (3, operand).
3385 void execute(VPTransformState &State) override;
3386
3387 /// Return the cost of this VPDerivedIVRecipe.
3389 VPCostContext &Ctx) const override {
3390 // TODO: Compute accurate cost after retiring the legacy cost model.
3391 return 0;
3392 }
3393
3394#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3395 /// Print the recipe.
3396 void print(raw_ostream &O, const Twine &Indent,
3397 VPSlotTracker &SlotTracker) const override;
3398#endif
3399
3401 return getStartValue()->getLiveInIRValue()->getType();
3402 }
3403
3404 VPValue *getStartValue() const { return getOperand(0); }
3405 VPValue *getStepValue() const { return getOperand(2); }
3406
3407 /// Returns true if the recipe only uses the first lane of operand \p Op.
3408 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3410 "Op must be an operand of the recipe");
3411 return true;
3412 }
3413};
3414
3415/// A recipe for handling phi nodes of integer and floating-point inductions,
3416/// producing their scalar values.
3418 public VPUnrollPartAccessor<2> {
3419 Instruction::BinaryOps InductionOpcode;
3420
3421public:
3424 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3425 ArrayRef<VPValue *>({IV, Step}), FMFs),
3426 InductionOpcode(Opcode) {}
3427
3429 VPValue *Step)
3431 IV, Step, IndDesc.getInductionOpcode(),
3432 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3433 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3434 : FastMathFlags()) {}
3435
3436 ~VPScalarIVStepsRecipe() override = default;
3437
3439 return new VPScalarIVStepsRecipe(
3440 getOperand(0), getOperand(1), InductionOpcode,
3442 }
3443
3444 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3445
3446 /// Generate the scalarized versions of the phi node as needed by their users.
3447 void execute(VPTransformState &State) override;
3448
3449 /// Return the cost of this VPScalarIVStepsRecipe.
3451 VPCostContext &Ctx) const override {
3452 // TODO: Compute accurate cost after retiring the legacy cost model.
3453 return 0;
3454 }
3455
3456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3457 /// Print the recipe.
3458 void print(raw_ostream &O, const Twine &Indent,
3459 VPSlotTracker &SlotTracker) const override;
3460#endif
3461
3462 VPValue *getStepValue() const { return getOperand(1); }
3463
3464 /// Returns true if the recipe only uses the first lane of operand \p Op.
3465 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3467 "Op must be an operand of the recipe");
3468 return true;
3469 }
3470};
3471
3472/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3473/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3474/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3476public:
3478
3479protected:
3480 /// The VPRecipes held in the order of output instructions to generate.
3482
3483 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3484 : VPBlockBase(BlockSC, Name.str()) {}
3485
3486public:
3487 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3488 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3489 if (Recipe)
3490 appendRecipe(Recipe);
3491 }
3492
3493 ~VPBasicBlock() override {
3494 while (!Recipes.empty())
3495 Recipes.pop_back();
3496 }
3497
3498 /// Instruction iterators...
3503
3504 //===--------------------------------------------------------------------===//
3505 /// Recipe iterator methods
3506 ///
3507 inline iterator begin() { return Recipes.begin(); }
3508 inline const_iterator begin() const { return Recipes.begin(); }
3509 inline iterator end() { return Recipes.end(); }
3510 inline const_iterator end() const { return Recipes.end(); }
3511
3512 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3513 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3514 inline reverse_iterator rend() { return Recipes.rend(); }
3515 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3516
3517 inline size_t size() const { return Recipes.size(); }
3518 inline bool empty() const { return Recipes.empty(); }
3519 inline const VPRecipeBase &front() const { return Recipes.front(); }
3520 inline VPRecipeBase &front() { return Recipes.front(); }
3521 inline const VPRecipeBase &back() const { return Recipes.back(); }
3522 inline VPRecipeBase &back() { return Recipes.back(); }
3523
3524 /// Returns a reference to the list of recipes.
3526
3527 /// Returns a pointer to a member of the recipe list.
3529 return &VPBasicBlock::Recipes;
3530 }
3531
3532 /// Method to support type inquiry through isa, cast, and dyn_cast.
3533 static inline bool classof(const VPBlockBase *V) {
3534 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3535 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3536 }
3537
3538 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3539 assert(Recipe && "No recipe to append.");
3540 assert(!Recipe->Parent && "Recipe already in VPlan");
3541 Recipe->Parent = this;
3542 Recipes.insert(InsertPt, Recipe);
3543 }
3544
3545 /// Augment the existing recipes of a VPBasicBlock with an additional
3546 /// \p Recipe as the last recipe.
3547 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3548
3549 /// The method which generates the output IR instructions that correspond to
3550 /// this VPBasicBlock, thereby "executing" the VPlan.
3551 void execute(VPTransformState *State) override;
3552
3553 /// Return the cost of this VPBasicBlock.
3555
3556 /// Return the position of the first non-phi node recipe in the block.
3558
3559 /// Returns an iterator range over the PHI-like recipes in the block.
3561 return make_range(begin(), getFirstNonPhi());
3562 }
3563
3564 /// Split current block at \p SplitAt by inserting a new block between the
3565 /// current block and its successors and moving all recipes starting at
3566 /// SplitAt to the new block. Returns the new block.
3567 VPBasicBlock *splitAt(iterator SplitAt);
3568
3571
3572#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3573 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3574 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3575 ///
3576 /// Note that the numbering is applied to the whole VPlan, so printing
3577 /// individual blocks is consistent with the whole VPlan printing.
3578 void print(raw_ostream &O, const Twine &Indent,
3579 VPSlotTracker &SlotTracker) const override;
3580 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3581#endif
3582
3583 /// If the block has multiple successors, return the branch recipe terminating
3584 /// the block. If there are no or only a single successor, return nullptr;
3586 const VPRecipeBase *getTerminator() const;
3587
3588 /// Returns true if the block is exiting it's parent region.
3589 bool isExiting() const;
3590
3591 /// Clone the current block and it's recipes, without updating the operands of
3592 /// the cloned recipes.
3593 VPBasicBlock *clone() override;
3594
3595protected:
3596 /// Execute the recipes in the IR basic block \p BB.
3597 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3598
3599 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3600 /// generated for this VPBB.
3602
3603private:
3604 /// Create an IR BasicBlock to hold the output instructions generated by this
3605 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3606 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3607};
3608
3609/// A special type of VPBasicBlock that wraps an existing IR basic block.
3610/// Recipes of the block get added before the first non-phi instruction in the
3611/// wrapped block.
3612/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3613/// preheader block.
3615 BasicBlock *IRBB;
3616
3617public:
3619 : VPBasicBlock(VPIRBasicBlockSC,
3620 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3621 IRBB(IRBB) {}
3622
3623 ~VPIRBasicBlock() override {}
3624
3625 static inline bool classof(const VPBlockBase *V) {
3626 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3627 }
3628
3629 /// The method which generates the output IR instructions that correspond to
3630 /// this VPBasicBlock, thereby "executing" the VPlan.
3631 void execute(VPTransformState *State) override;
3632
3633 VPIRBasicBlock *clone() override;
3634
3635 BasicBlock *getIRBasicBlock() const { return IRBB; }
3636};
3637
3638/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3639/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3640/// A VPRegionBlock may indicate that its contents are to be replicated several
3641/// times. This is designed to support predicated scalarization, in which a
3642/// scalar if-then code structure needs to be generated VF * UF times. Having
3643/// this replication indicator helps to keep a single model for multiple
3644/// candidate VF's. The actual replication takes place only once the desired VF
3645/// and UF have been determined.
3647 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3648 VPBlockBase *Entry;
3649
3650 /// Hold the Single Exiting block of the SESE region modelled by the
3651 /// VPRegionBlock.
3652 VPBlockBase *Exiting;
3653
3654 /// An indicator whether this region is to generate multiple replicated
3655 /// instances of output IR corresponding to its VPBlockBases.
3656 bool IsReplicator;
3657
3658public:
3660 const std::string &Name = "", bool IsReplicator = false)
3661 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3662 IsReplicator(IsReplicator) {
3663 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3664 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3665 Entry->setParent(this);
3666 Exiting->setParent(this);
3667 }
3668 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3669 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3670 IsReplicator(IsReplicator) {}
3671
3672 ~VPRegionBlock() override {}
3673
3674 /// Method to support type inquiry through isa, cast, and dyn_cast.
3675 static inline bool classof(const VPBlockBase *V) {
3676 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3677 }
3678
3679 const VPBlockBase *getEntry() const { return Entry; }
3680 VPBlockBase *getEntry() { return Entry; }
3681
3682 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3683 /// EntryBlock must have no predecessors.
3684 void setEntry(VPBlockBase *EntryBlock) {
3685 assert(EntryBlock->getPredecessors().empty() &&
3686 "Entry block cannot have predecessors.");
3687 Entry = EntryBlock;
3688 EntryBlock->setParent(this);
3689 }
3690
3691 const VPBlockBase *getExiting() const { return Exiting; }
3692 VPBlockBase *getExiting() { return Exiting; }
3693
3694 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3695 /// ExitingBlock must have no successors.
3696 void setExiting(VPBlockBase *ExitingBlock) {
3697 assert(ExitingBlock->getSuccessors().empty() &&
3698 "Exit block cannot have successors.");
3699 Exiting = ExitingBlock;
3700 ExitingBlock->setParent(this);
3701 }
3702
3703 /// Returns the pre-header VPBasicBlock of the loop region.
3705 assert(!isReplicator() && "should only get pre-header of loop regions");
3707 }
3708
3709 /// An indicator whether this region is to generate multiple replicated
3710 /// instances of output IR corresponding to its VPBlockBases.
3711 bool isReplicator() const { return IsReplicator; }
3712
3713 /// The method which generates the output IR instructions that correspond to
3714 /// this VPRegionBlock, thereby "executing" the VPlan.
3715 void execute(VPTransformState *State) override;
3716
3717 // Return the cost of this region.
3719
3720#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3721 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3722 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3723 /// consequtive numbers.
3724 ///
3725 /// Note that the numbering is applied to the whole VPlan, so printing
3726 /// individual regions is consistent with the whole VPlan printing.
3727 void print(raw_ostream &O, const Twine &Indent,
3728 VPSlotTracker &SlotTracker) const override;
3729 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3730#endif
3731
3732 /// Clone all blocks in the single-entry single-exit region of the block and
3733 /// their recipes without updating the operands of the cloned recipes.
3734 VPRegionBlock *clone() override;
3735};
3736
3737/// VPlan models a candidate for vectorization, encoding various decisions take
3738/// to produce efficient output IR, including which branches, basic-blocks and
3739/// output IR instructions to generate, and their cost. VPlan holds a
3740/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3741/// VPBasicBlock.
3742class VPlan {
3743 friend class VPlanPrinter;
3744 friend class VPSlotTracker;
3745
3746 /// VPBasicBlock corresponding to the original preheader. Used to place
3747 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3748 /// rest of VPlan execution.
3749 /// When this VPlan is used for the epilogue vector loop, the entry will be
3750 /// replaced by a new entry block created during skeleton creation.
3751 VPBasicBlock *Entry;
3752
3753 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3754 VPIRBasicBlock *ScalarHeader;
3755
3756 /// Holds the VFs applicable to this VPlan.
3758
3759 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3760 /// any UF.
3762
3763 /// Holds the name of the VPlan, for printing.
3764 std::string Name;
3765
3766 /// Represents the trip count of the original loop, for folding
3767 /// the tail.
3768 VPValue *TripCount = nullptr;
3769
3770 /// Represents the backedge taken count of the original loop, for folding
3771 /// the tail. It equals TripCount - 1.
3772 VPValue *BackedgeTakenCount = nullptr;
3773
3774 /// Represents the vector trip count.
3775 VPValue VectorTripCount;
3776
3777 /// Represents the vectorization factor of the loop.
3778 VPValue VF;
3779
3780 /// Represents the loop-invariant VF * UF of the vector loop region.
3781 VPValue VFxUF;
3782
3783 /// Holds a mapping between Values and their corresponding VPValue inside
3784 /// VPlan.
3785 Value2VPValueTy Value2VPValue;
3786
3787 /// Contains all the external definitions created for this VPlan. External
3788 /// definitions are VPValues that hold a pointer to their underlying IR.
3789 SmallVector<VPValue *, 16> VPLiveInsToFree;
3790
3791 /// Mapping from SCEVs to the VPValues representing their expansions.
3792 /// NOTE: This mapping is temporary and will be removed once all users have
3793 /// been modeled in VPlan directly.
3794 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3795
3796 /// Blocks allocated and owned by the VPlan. They will be deleted once the
3797 /// VPlan is destroyed.
3798 SmallVector<VPBlockBase *> CreatedBlocks;
3799
3800 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3801 /// wrapping the original header of the scalar loop.
3802 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3803 : Entry(Entry), ScalarHeader(ScalarHeader) {
3804 Entry->setPlan(this);
3805 assert(ScalarHeader->getNumSuccessors() == 0 &&
3806 "scalar header must be a leaf node");
3807 }
3808
3809public:
3810 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3811 /// original preheader and scalar header of \p L, to be used as entry and
3812 /// scalar header blocks of the new VPlan.
3813 VPlan(Loop *L);
3814
3815 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
3816 /// wrapping \p ScalarHeaderBB and a trip count of \p TC.
3817 VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC) {
3818 setEntry(createVPBasicBlock("preheader"));
3819 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
3820 TripCount = TC;
3821 }
3822
3823 ~VPlan();
3824
3826 Entry = VPBB;
3827 VPBB->setPlan(this);
3828 }
3829
3830 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3831 /// original scalar pre-header) which contains SCEV expansions that need
3832 /// to happen before the CFG is modified (when executing a VPlan for the
3833 /// epilogue vector loop, the original entry needs to be replaced by a new
3834 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3835 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3836 /// to guard executing the scalar epilogue loop, it will be added to the
3837 /// middle block, together with VPBasicBlocks for the scalar preheader and
3838 /// exit blocks. \p InductionTy is the type of the canonical induction and
3839 /// used for related values, like the trip count expression.
3840 static VPlanPtr createInitialVPlan(Type *InductionTy,
3842 bool RequiresScalarEpilogueCheck,
3843 bool TailFolded, Loop *TheLoop);
3844
3845 /// Prepare the plan for execution, setting up the required live-in values.
3846 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3847 VPTransformState &State);
3848
3849 /// Generate the IR code for this VPlan.
3850 void execute(VPTransformState *State);
3851
3852 /// Return the cost of this plan.
3854
3855 VPBasicBlock *getEntry() { return Entry; }
3856 const VPBasicBlock *getEntry() const { return Entry; }
3857
3858 /// Returns the preheader of the vector loop region, if one exists, or null
3859 /// otherwise.
3861 VPRegionBlock *VectorRegion = getVectorLoopRegion();
3862 return VectorRegion
3863 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
3864 : nullptr;
3865 }
3866
3867 /// Returns the VPRegionBlock of the vector loop.
3869 const VPRegionBlock *getVectorLoopRegion() const;
3870
3871 /// Returns the 'middle' block of the plan, that is the block that selects
3872 /// whether to execute the scalar tail loop or the exit block from the loop
3873 /// latch.
3875 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3876 }
3878 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3879 }
3880
3881 /// Return the VPBasicBlock for the preheader of the scalar loop.
3883 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3884 }
3885
3886 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3887 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3888
3889 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3890 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3891 /// VPlanHCFG, as the definition of the type needs access to the definitions
3892 /// of VPBlockShallowTraversalWrapper.
3893 auto getExitBlocks();
3894
3895 /// The trip count of the original loop.
3897 assert(TripCount && "trip count needs to be set before accessing it");
3898 return TripCount;
3899 }
3900
3901 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3902 /// the original trip count have been replaced.
3903 void resetTripCount(VPValue *NewTripCount) {
3904 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3905 "TripCount always must be set");
3906 TripCount = NewTripCount;
3907 }
3908
3909 /// The backedge taken count of the original loop.
3911 if (!BackedgeTakenCount)
3912 BackedgeTakenCount = new VPValue();
3913 return BackedgeTakenCount;
3914 }
3915
3916 /// The vector trip count.
3917 VPValue &getVectorTripCount() { return VectorTripCount; }
3918
3919 /// Returns the VF of the vector loop region.
3920 VPValue &getVF() { return VF; };
3921
3922 /// Returns VF * UF of the vector loop region.
3923 VPValue &getVFxUF() { return VFxUF; }
3924
3925 void addVF(ElementCount VF) { VFs.insert(VF); }
3926
3928 assert(hasVF(VF) && "Cannot set VF not already in plan");
3929 VFs.clear();
3930 VFs.insert(VF);
3931 }
3932
3933 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3935 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3936 }
3937
3938 /// Returns an iterator range over all VFs of the plan.
3941 return {VFs.begin(), VFs.end()};
3942 }
3943
3944 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3945
3946 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3947
3948 unsigned getUF() const {
3949 assert(UFs.size() == 1 && "Expected a single UF");
3950 return UFs[0];
3951 }
3952
3953 void setUF(unsigned UF) {
3954 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3955 UFs.clear();
3956 UFs.insert(UF);
3957 }
3958
3959 /// Return a string with the name of the plan and the applicable VFs and UFs.
3960 std::string getName() const;
3961
3962 void setName(const Twine &newName) { Name = newName.str(); }
3963
3964 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3965 /// yet) for \p V.
3967 assert(V && "Trying to get or add the VPValue of a null Value");
3968 if (!Value2VPValue.count(V)) {
3969 VPValue *VPV = new VPValue(V);
3970 VPLiveInsToFree.push_back(VPV);
3971 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3972 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3973 Value2VPValue[V] = VPV;
3974 }
3975
3976 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3977 assert(Value2VPValue[V]->isLiveIn() &&
3978 "Only live-ins should be in mapping");
3979 return Value2VPValue[V];
3980 }
3981
3982 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3983 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3984
3985#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3986 /// Print the live-ins of this VPlan to \p O.
3987 void printLiveIns(raw_ostream &O) const;
3988
3989 /// Print this VPlan to \p O.
3990 void print(raw_ostream &O) const;
3991
3992 /// Print this VPlan in DOT format to \p O.
3993 void printDOT(raw_ostream &O) const;
3994
3995 /// Dump the plan to stderr (for debugging).
3996 LLVM_DUMP_METHOD void dump() const;
3997#endif
3998
3999 /// Returns the canonical induction recipe of the vector loop.
4002 if (EntryVPBB->empty()) {
4003 // VPlan native path.
4004 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4005 }
4006 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4007 }
4008
4009 VPValue *getSCEVExpansion(const SCEV *S) const {
4010 return SCEVToExpansion.lookup(S);
4011 }
4012
4013 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4014 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4015 SCEVToExpansion[S] = V;
4016 }
4017
4018 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4019 /// recipes to refer to the clones, and return it.
4020 VPlan *duplicate();
4021
4022 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
4023 /// present. The returned block is owned by the VPlan and deleted once the
4024 /// VPlan is destroyed.
4026 VPRecipeBase *Recipe = nullptr) {
4027 auto *VPB = new VPBasicBlock(Name, Recipe);
4028 CreatedBlocks.push_back(VPB);
4029 return VPB;
4030 }
4031
4032 /// Create a new VPRegionBlock with \p Entry, \p Exiting and \p Name. If \p
4033 /// IsReplicator is true, the region is a replicate region. The returned block
4034 /// is owned by the VPlan and deleted once the VPlan is destroyed.
4036 const std::string &Name = "",
4037 bool IsReplicator = false) {
4038 auto *VPB = new VPRegionBlock(Entry, Exiting, Name, IsReplicator);
4039 CreatedBlocks.push_back(VPB);
4040 return VPB;
4041 }
4042
4043 /// Create a new VPRegionBlock with \p Name and entry and exiting blocks set
4044 /// to nullptr. If \p IsReplicator is true, the region is a replicate region.
4045 /// The returned block is owned by the VPlan and deleted once the VPlan is
4046 /// destroyed.
4047 VPRegionBlock *createVPRegionBlock(const std::string &Name = "",
4048 bool IsReplicator = false) {
4049 auto *VPB = new VPRegionBlock(Name, IsReplicator);
4050 CreatedBlocks.push_back(VPB);
4051 return VPB;
4052 }
4053
4054 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
4055 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
4056 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
4058
4059 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
4060 /// instructions in \p IRBB, except its terminator which is managed by the
4061 /// successors of the block in VPlan. The returned block is owned by the VPlan
4062 /// and deleted once the VPlan is destroyed.
4064};
4065
4066#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4067/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4068/// indented and follows the dot format.
4070 raw_ostream &OS;
4071 const VPlan &Plan;
4072 unsigned Depth = 0;
4073 unsigned TabWidth = 2;
4074 std::string Indent;
4075 unsigned BID = 0;
4077
4079
4080 /// Handle indentation.
4081 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4082
4083 /// Print a given \p Block of the Plan.
4084 void dumpBlock(const VPBlockBase *Block);
4085
4086 /// Print the information related to the CFG edges going out of a given
4087 /// \p Block, followed by printing the successor blocks themselves.
4088 void dumpEdges(const VPBlockBase *Block);
4089
4090 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4091 /// its successor blocks.
4092 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4093
4094 /// Print a given \p Region of the Plan.
4095 void dumpRegion(const VPRegionBlock *Region);
4096
4097 unsigned getOrCreateBID(const VPBlockBase *Block) {
4098 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4099 }
4100
4101 Twine getOrCreateName(const VPBlockBase *Block);
4102
4103 Twine getUID(const VPBlockBase *Block);
4104
4105 /// Print the information related to a CFG edge between two VPBlockBases.
4106 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4107 const Twine &Label);
4108
4109public:
4111 : OS(O), Plan(P), SlotTracker(&P) {}
4112
4113 LLVM_DUMP_METHOD void dump();
4114};
4115
4117 const Value *V;
4118
4119 VPlanIngredient(const Value *V) : V(V) {}
4120
4121 void print(raw_ostream &O) const;
4122};
4123
4125 I.print(OS);
4126 return OS;
4127}
4128
4130 Plan.print(OS);
4131 return OS;
4132}
4133#endif
4134
4135//===----------------------------------------------------------------------===//
4136// VPlan Utilities
4137//===----------------------------------------------------------------------===//
4138
4139/// Class that provides utilities for VPBlockBases in VPlan.
4141public:
4142 VPBlockUtils() = delete;
4143
4144 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
4145 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
4146 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
4147 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
4148 /// have neither successors nor predecessors.
4149 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4150 assert(NewBlock->getSuccessors().empty() &&
4151 NewBlock->getPredecessors().empty() &&
4152 "Can't insert new block with predecessors or successors.");
4153 NewBlock->setParent(BlockPtr->getParent());
4154 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
4155 for (VPBlockBase *Succ : Succs) {
4156 disconnectBlocks(BlockPtr, Succ);
4157 connectBlocks(NewBlock, Succ);
4158 }
4159 connectBlocks(BlockPtr, NewBlock);
4160 }
4161
4162 /// Insert disconnected block \p NewBlock before \p Blockptr. First
4163 /// disconnects all predecessors of \p BlockPtr and connects them to \p
4164 /// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as
4165 /// successor of \p NewBlock.
4166 static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4167 assert(NewBlock->getSuccessors().empty() &&
4168 NewBlock->getPredecessors().empty() &&
4169 "Can't insert new block with predecessors or successors.");
4170 NewBlock->setParent(BlockPtr->getParent());
4171 for (VPBlockBase *Pred : to_vector(BlockPtr->predecessors())) {
4172 disconnectBlocks(Pred, BlockPtr);
4173 connectBlocks(Pred, NewBlock);
4174 }
4175 connectBlocks(NewBlock, BlockPtr);
4176 }
4177
4178 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
4179 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
4180 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
4181 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
4182 /// and \p IfTrue and \p IfFalse must have neither successors nor
4183 /// predecessors.
4184 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
4185 VPBlockBase *BlockPtr) {
4186 assert(IfTrue->getSuccessors().empty() &&
4187 "Can't insert IfTrue with successors.");
4188 assert(IfFalse->getSuccessors().empty() &&
4189 "Can't insert IfFalse with successors.");
4190 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
4191 IfTrue->setPredecessors({BlockPtr});
4192 IfFalse->setPredecessors({BlockPtr});
4193 IfTrue->setParent(BlockPtr->getParent());
4194 IfFalse->setParent(BlockPtr->getParent());
4195 }
4196
4197 /// Connect VPBlockBases \p From and \p To bi-directionally. If \p PredIdx is
4198 /// -1, append \p From to the predecessors of \p To, otherwise set \p To's
4199 /// predecessor at \p PredIdx to \p From. If \p SuccIdx is -1, append \p To to
4200 /// the successors of \p From, otherwise set \p From's successor at \p SuccIdx
4201 /// to \p To. Both VPBlockBases must have the same parent, which can be null.
4202 /// Both VPBlockBases can be already connected to other VPBlockBases.
4204 unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
4205 assert((From->getParent() == To->getParent()) &&
4206 "Can't connect two block with different parents");
4207 assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
4208 "Blocks can't have more than two successors.");
4209 if (SuccIdx == -1u)
4210 From->appendSuccessor(To);
4211 else
4212 From->getSuccessors()[SuccIdx] = To;
4213
4214 if (PredIdx == -1u)
4215 To->appendPredecessor(From);
4216 else
4217 To->getPredecessors()[PredIdx] = From;
4218 }
4219
4220 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
4221 /// from the successors of \p From and \p From from the predecessors of \p To.
4223 assert(To && "Successor to disconnect is null.");
4224 From->removeSuccessor(To);
4225 To->removePredecessor(From);
4226 }
4227
4228 /// Reassociate all the blocks connected to \p Old so that they now point to
4229 /// \p New.
4231 for (auto *Pred : to_vector(Old->getPredecessors()))
4232 Pred->replaceSuccessor(Old, New);
4233 for (auto *Succ : to_vector(Old->getSuccessors()))
4234 Succ->replacePredecessor(Old, New);
4235 New->setPredecessors(Old->getPredecessors());
4236 New->setSuccessors(Old->getSuccessors());
4237 Old->clearPredecessors();
4238 Old->clearSuccessors();
4239 }
4240
4241 /// Return an iterator range over \p Range which only includes \p BlockTy
4242 /// blocks. The accesses are casted to \p BlockTy.
4243 template <typename BlockTy, typename T>
4244 static auto blocksOnly(const T &Range) {
4245 // Create BaseTy with correct const-ness based on BlockTy.
4246 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
4247 const VPBlockBase, VPBlockBase>;
4248
4249 // We need to first create an iterator range over (const) BlocktTy & instead
4250 // of (const) BlockTy * for filter_range to work properly.
4251 auto Mapped =
4252 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
4254 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
4255 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
4256 return cast<BlockTy>(&Block);
4257 });
4258 }
4259
4260 /// Inserts \p BlockPtr on the edge between \p From and \p To. That is, update
4261 /// \p From's successor to \p To to point to \p BlockPtr and \p To's
4262 /// predecessor from \p From to \p BlockPtr. \p From and \p To are added to \p
4263 /// BlockPtr's predecessors and successors respectively. There must be a
4264 /// single edge between \p From and \p To.
4266 VPBlockBase *BlockPtr) {
4267 auto &Successors = From->getSuccessors();
4268 auto &Predecessors = To->getPredecessors();
4269 assert(count(Successors, To) == 1 && count(Predecessors, From) == 1 &&
4270 "must have single between From and To");
4271 unsigned SuccIdx = std::distance(Successors.begin(), find(Successors, To));
4272 unsigned PredIx =
4273 std::distance(Predecessors.begin(), find(Predecessors, From));
4274 VPBlockUtils::connectBlocks(From, BlockPtr, -1, SuccIdx);
4275 VPBlockUtils::connectBlocks(BlockPtr, To, PredIx, -1);
4276 }
4277};
4278
4281 InterleaveGroupMap;
4282
4283 /// Type for mapping of instruction based interleave groups to VPInstruction
4284 /// interleave groups
4287
4288 /// Recursively \p Region and populate VPlan based interleave groups based on
4289 /// \p IAI.
4290 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4292 /// Recursively traverse \p Block and populate VPlan based interleave groups
4293 /// based on \p IAI.
4294 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4296
4297public:
4299
4302 // Avoid releasing a pointer twice.
4303 for (auto &I : InterleaveGroupMap)
4304 DelSet.insert(I.second);
4305 for (auto *Ptr : DelSet)
4306 delete Ptr;
4307 }
4308
4309 /// Get the interleave group that \p Instr belongs to.
4310 ///
4311 /// \returns nullptr if doesn't have such group.
4314 return InterleaveGroupMap.lookup(Instr);
4315 }
4316};
4317
4318/// Class that maps (parts of) an existing VPlan to trees of combined
4319/// VPInstructions.
4321 enum class OpMode { Failed, Load, Opcode };
4322
4323 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4324 /// DenseMap keys.
4325 struct BundleDenseMapInfo {
4326 static SmallVector<VPValue *, 4> getEmptyKey() {
4327 return {reinterpret_cast<VPValue *>(-1)};
4328 }
4329
4330 static SmallVector<VPValue *, 4> getTombstoneKey() {
4331 return {reinterpret_cast<VPValue *>(-2)};
4332 }
4333
4334 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4335 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4336 }
4337
4338 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4340 return LHS == RHS;
4341 }
4342 };
4343
4344 /// Mapping of values in the original VPlan to a combined VPInstruction.
4346 BundleToCombined;
4347
4349
4350 /// Basic block to operate on. For now, only instructions in a single BB are
4351 /// considered.
4352 const VPBasicBlock &BB;
4353
4354 /// Indicates whether we managed to combine all visited instructions or not.
4355 bool CompletelySLP = true;
4356
4357 /// Width of the widest combined bundle in bits.
4358 unsigned WidestBundleBits = 0;
4359
4360 using MultiNodeOpTy =
4361 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4362
4363 // Input operand bundles for the current multi node. Each multi node operand
4364 // bundle contains values not matching the multi node's opcode. They will
4365 // be reordered in reorderMultiNodeOps, once we completed building a
4366 // multi node.
4367 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4368
4369 /// Indicates whether we are building a multi node currently.
4370 bool MultiNodeActive = false;
4371
4372 /// Check if we can vectorize Operands together.
4373 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4374
4375 /// Add combined instruction \p New for the bundle \p Operands.
4376 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4377
4378 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4379 VPInstruction *markFailed();
4380
4381 /// Reorder operands in the multi node to maximize sequential memory access
4382 /// and commutative operations.
4383 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4384
4385 /// Choose the best candidate to use for the lane after \p Last. The set of
4386 /// candidates to choose from are values with an opcode matching \p Last's
4387 /// or loads consecutive to \p Last.
4388 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4389 SmallPtrSetImpl<VPValue *> &Candidates,
4391
4392#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4393 /// Print bundle \p Values to dbgs().
4394 void dumpBundle(ArrayRef<VPValue *> Values);
4395#endif
4396
4397public:
4398 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4399
4400 ~VPlanSlp() = default;
4401
4402 /// Tries to build an SLP tree rooted at \p Operands and returns a
4403 /// VPInstruction combining \p Operands, if they can be combined.
4405
4406 /// Return the width of the widest combined bundle in bits.
4407 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4408
4409 /// Return true if all visited instruction can be combined.
4410 bool isCompletelySLP() const { return CompletelySLP; }
4411};
4412} // end namespace llvm
4413
4414#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1315
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:819
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:108
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:622
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:198
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:195
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:121
ElementCount operator*() const
Definition: VPlan.h:129
iterator & operator++()
Definition: VPlan.h:131
iterator(ElementCount VF)
Definition: VPlan.h:125
bool operator==(const iterator &Other) const
Definition: VPlan.h:127
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3233
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3241
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3250
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3235
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3475
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3500
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3547
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3502
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3499
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition: VPlan.cpp:415
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:480
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3525
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3483
iterator end()
Definition: VPlan.h:3509
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:3487
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3507
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3501
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:517
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3560
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:758
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:208
~VPBasicBlock() override
Definition: VPlan.h:3493
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:566
const_reverse_iterator rbegin() const
Definition: VPlan.h:3513
reverse_iterator rend()
Definition: VPlan.h:3514
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:536
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3481
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:524
VPRecipeBase & back()
Definition: VPlan.h:3522
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:631
const VPRecipeBase & front() const
Definition: VPlan.h:3519
const_iterator begin() const
Definition: VPlan.h:3508
VPRecipeBase & front()
Definition: VPlan.h:3520
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:614
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:602
const VPRecipeBase & back() const
Definition: VPlan.h:3521
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3538
bool empty() const
Definition: VPlan.h:3518
const_iterator end() const
Definition: VPlan.h:3510
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3533
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3528
reverse_iterator rbegin()
Definition: VPlan.h:3512
size_t size() const
Definition: VPlan.h:3517
const_reverse_iterator rend() const
Definition: VPlan.h:3515
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2433
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2439
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2486
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2462
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2467
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2457
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2444
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2453
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:397
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:613
VPRegionBlock * getParent()
Definition: VPlan.h:489
VPBlocksTy & getPredecessors()
Definition: VPlan.h:521
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:518
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:672
void setName(const Twine &newName)
Definition: VPlan.h:482
size_t getNumSuccessors() const
Definition: VPlan.h:535
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:517
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:628
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:619
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:641
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:662
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:571
size_t getNumPredecessors() const
Definition: VPlan.h:536
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:604
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:200
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:520
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:474
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:153
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:172
const VPRegionBlock * getParent() const
Definition: VPlan.h:490
const std::string & getName() const
Definition: VPlan.h:480
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:623
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:561
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:595
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:531
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:555
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:620
unsigned getVPBlockID() const
Definition: VPlan.h:487
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:648
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:466
VPBlocksTy & getSuccessors()
Definition: VPlan.h:515
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:192
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:584
void setParent(VPRegionBlock *P)
Definition: VPlan.h:500
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:577
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:525
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:514
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:4140
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:4244
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:4149
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlan.h:4265
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:4184
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4203
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4222
static void reassociateBlocks(VPBlockBase *Old, VPBlockBase *New)
Reassociate all the blocks connected to Old so that they now point to New.
Definition: VPlan.h:4230
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition: VPlan.h:4166
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2793
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2829
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2817
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2795
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2801
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2836
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3172
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3215
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3187
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3179
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3174
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3208
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3203
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3191
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3222
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
unsigned getVPDefID() const
Definition: VPlanValue.h:419
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3349
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3388
VPValue * getStepValue() const
Definition: VPlan.h:3405
Type * getScalarType() const
Definition: VPlan.h:3400
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3376
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3368
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3408
VPValue * getStartValue() const
Definition: VPlan.h:3404
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3360
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3268
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3281
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3275
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3285
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3291
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3270
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3298
Recipe to expand a SCEV expression.
Definition: VPlan.h:3133
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:3138
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3153
const SCEV * getSCEV() const
Definition: VPlan.h:3165
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3143
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:2022
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
static bool classof(const VPValue *V)
Definition: VPlan.h:2039
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:2024
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2070
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2059
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2067
VPValue * getStartValue() const
Definition: VPlan.h:2062
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2035
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2076
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1771
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1783
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPHistogramRecipe(unsigned Opcode, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:1777
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1800
unsigned getOpcode() const
Definition: VPlan.h:1796
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3614
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:451
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3618
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3635
~VPIRBasicBlock() override
Definition: VPlan.h:3623
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3625
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.cpp:473
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
Definition: VPlan.h:1376
Instruction & getInstruction() const
Definition: VPlan.h:1400
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1414
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1387
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1408
VPIRInstruction(Instruction &I)
Definition: VPlan.h:1380
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1191
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1291
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1266
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1302
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1209
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1197
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1212
@ CalculateTripCountMinusVF
Definition: VPlan.h:1210
bool hasResult() const
Definition: VPlan.h:1332
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1369
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1317
unsigned getOpcode() const
Definition: VPlan.h:1309
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1278
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1271
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1283
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2500
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2583
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2541
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2512
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2547
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2533
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2554
Instruction * getInsertPos() const
Definition: VPlan.h:2589
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2574
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2578
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:4313
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:229
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:73
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:176
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:210
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:213
VPLane(unsigned Lane)
Definition: VPlan.h:175
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:200
static VPLane getFirstLane()
Definition: VPlan.h:178
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:156
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:216
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2848
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2880
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2856
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:2867
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2852
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:714
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:803
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:739
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:808
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:781
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:725
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:740
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:786
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:730
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:792
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:924
ExactFlagsTy ExactFlags
Definition: VPlan.h:974
FastMathFlagsTy FMFs
Definition: VPlan.h:977
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:976
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:971
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1144
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1026
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1105
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPNoWrapFlags GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1051
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1057
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1038
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1074
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1147
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:996
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:973
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1032
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1044
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:975
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:982
WrapFlagsTy WrapFlags
Definition: VPlan.h:972
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1151
bool isDisjoint() const
Definition: VPlan.h:1163
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1138
bool hasNoSignedWrap() const
Definition: VPlan.h:1157
static bool classof(const VPUser *U)
Definition: VPlan.h:1068
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:989
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2675
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2705
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2702
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2677
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2686
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2374
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:2387
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2425
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2397
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2428
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2407
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2420
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2595
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2660
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2630
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered, DebugLoc DL={})
Definition: VPlan.h:2615
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered, DebugLoc DL)
Definition: VPlan.h:2603
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2664
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2654
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2666
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2658
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2662
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2624
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2635
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3646
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:702
const VPBlockBase * getEntry() const
Definition: VPlan.h:3679
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3711
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3696
VPBlockBase * getExiting()
Definition: VPlan.h:3692
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3684
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:765
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:803
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3668
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3659
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:711
const VPBlockBase * getExiting() const
Definition: VPlan.h:3691
VPBlockBase * getEntry()
Definition: VPlan.h:3680
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3704
~VPRegionBlock() override
Definition: VPlan.h:3672
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3675
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2716
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2765
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2772
bool isUniform() const
Definition: VPlan.h:2760
bool isPredicated() const
Definition: VPlan.h:2762
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2735
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2725
unsigned getOpcode() const
Definition: VPlan.h:2789
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2784
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
Definition: VPlan.h:1899
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1923
VPReverseVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1937
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1930
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1916
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1903
const VPValue * getVFValue() const
Definition: VPlan.h:1912
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1575
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarCastRecipe.
Definition: VPlan.h:1600
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1590
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1614
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL)
Definition: VPlan.h:1583
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1612
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3418
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3465
VPValue * getStepValue() const
Definition: VPlan.h:3462
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3450
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:3428
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3438
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:3422
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Recipe to generate a scalar PHI.
Definition: VPlan.h:2258
VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, StringRef Name)
Definition: VPlan.h:2262
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2281
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPScalarPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPScalarPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2271
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:841
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:847
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:910
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:856
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:913
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:844
static bool classof(const VPUser *U)
Definition: VPlan.h:902
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:852
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:1176
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
operand_range operands()
Definition: VPlanValue.h:257
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:242
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_end()
Definition: VPlanValue.h:255
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:231
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1952
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1956
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1973
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1966
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1986
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1980
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1715
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1755
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1734
Function * getCalledScalarFunction() const
Definition: VPlan.h:1748
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1752
~VPWidenCallRecipe() override=default
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL={})
Definition: VPlan.h:1722
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3313
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3333
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3320
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3315
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1523
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1531
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1568
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1571
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1539
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1545
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1476
const VPValue * getEVL() const
Definition: VPlan.h:1500
~VPWidenEVLRecipe() override=default
VPWidenEVLRecipe(Instruction &I, iterator_range< IterT > Operands, VPValue &EVL)
Definition: VPlan.h:1481
VPWidenRecipe * clone() override final
Clone the current recipe.
Definition: VPlan.h:1492
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
Definition: VPlan.h:1485
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1499
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1507
A recipe for handling GEP instructions.
Definition: VPlan.h:1850
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1883
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1872
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1867
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2084
static bool classof(const VPValue *V)
Definition: VPlan.h:2100
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2120
PHINode * getPHINode() const
Definition: VPlan.h:2115
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2088
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2112
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2118
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2127
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2095
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2105
const VPValue * getStepValue() const
Definition: VPlan.h:2113
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2137
const TruncInst * getTruncInst() const
Definition: VPlan.h:2191
const VPValue * getVFValue() const
Definition: VPlan.h:2180
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2150
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2161
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2190
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2141
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2206
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2199
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1623
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, std::initializer_list< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1664
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition: VPlan.h:1688
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1697
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1649
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1703
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1672
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1700
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1691
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1640
A common base class for widening memory operations.
Definition: VPlan.h:2889
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2900
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2897
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2936
static bool classof(const VPUser *U)
Definition: VPlan.h:2930
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2956
Instruction & Ingredient
Definition: VPlan.h:2891
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2919
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition: VPlan.h:2964
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2894
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2923
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2910
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2950
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2946
void setMask(VPValue *Mask)
Definition: VPlan.h:2902
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2943
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2940
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:2297
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:2327
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:2336
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:2303
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2309
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:2333
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2227
~VPWidenPointerInductionRecipe() override=default
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:2218
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2244
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1425
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1441
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1436
unsigned getOpcode() const
Definition: VPlan.h:1465
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1430
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1447
static bool classof(const VPUser *U)
Definition: VPlan.h:1452
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:4069
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:4110
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1274
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:4320
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:4410
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:4398
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:4407
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3742
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1145
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1121
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:924
bool hasScalableVF()
Definition: VPlan.h:3934
VPBasicBlock * getEntry()
Definition: VPlan.h:3855
VPRegionBlock * createVPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Entry, Exiting and Name.
Definition: VPlan.h:4035
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3917
void setName(const Twine &newName)
Definition: VPlan.h:3962
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3923
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3920
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3896
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3910
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3940
VPlan(BasicBlock *ScalarHeaderBB, VPValue *TC)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and a tr...
Definition: VPlan.h:3817
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3856
unsigned getUF() const
Definition: VPlan.h:3948
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
Definition: VPlan.cpp:845
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition: VPlan.cpp:1246
bool hasVF(ElementCount VF)
Definition: VPlan.h:3933
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4013
bool hasUF(unsigned UF) const
Definition: VPlan.h:3946
void setVF(ElementCount VF)
Definition: VPlan.h:3927
VPRegionBlock * createVPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Create a new VPRegionBlock with Name and entry and exiting blocks set to nullptr.
Definition: VPlan.h:4047
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:309
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1052
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1046
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3874
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3903
VPBasicBlock * getMiddleBlock()
Definition: VPlan.h:3877
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:3825
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition: VPlan.h:4025
VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:1252
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3966
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1151
bool hasScalarVFOnly() const
Definition: VPlan.h:3944
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:3882
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:956
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4000
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1104
void addVF(ElementCount VF)
Definition: VPlan.h:3925
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:3887
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3983
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4009
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1068
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition: VPlan.h:3860
void setUF(unsigned UF)
Definition: VPlan.h:3953
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1192
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:144
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:573
@ Other
Any other memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:92
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:97
iterator end()
Definition: VPlan.h:138
const ElementCount Start
Definition: VPlan.h:99
ElementCount End
Definition: VPlan.h:102
iterator begin()
Definition: VPlan.h:137
bool isEmpty() const
Definition: VPlan.h:104
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:108
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:682
LLVMContext & LLVMCtx
Definition: VPlan.h:686
LoopVectorizationCostModel & CM
Definition: VPlan.h:687
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1665
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:685
const TargetLibraryInfo & TLI
Definition: VPlan.h:684
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LoopVectorizationCostModel &CM)
Definition: VPlan.h:690
const TargetTransformInfo & TTI
Definition: VPlan.h:683
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:688
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2342
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2352
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2343
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2348
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:941
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:338
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:344
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:352
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:340
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:348
CFGState(DominatorTree *DT)
Definition: VPlan.h:357
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:348
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:355
DenseMap< VPValue *, Value * > VPV2Vector
Definition: VPlan.h:255
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
Definition: VPlan.h:257
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:269
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:267
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:366
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:389
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:392
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:361
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:394
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:249
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:385
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:353
void reset(VPValue *Def, Value *V, const VPLane &Lane)
Reset an existing scalar value for Def and a given Lane.
Definition: VPlan.h:306
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:250
void set(VPValue *Def, Value *V, const VPLane &Lane)
Set the generated scalar V for Def and the given Lane.
Definition: VPlan.h:296
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:369
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:242
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:375
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:372
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:290
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:245
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:372
Loop * CurrentParentLoop
The parent loop object for the current scope, or nullptr.
Definition: VPlan.h:378
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:279
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:3008
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3020
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3036
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3009
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2969
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2970
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2996
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2978
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1812
bool isInvariantCond() const
Definition: VPlan.h:1844
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1820
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1814
VPValue * getCond() const
Definition: VPlan.h:1840
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3088
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3099
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3118
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3089
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3102
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3047
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3076
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3048
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3064
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3055
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:4119
const Value * V
Definition: VPlan.h:4117
void print(raw_ostream &O) const
Definition: VPlan.cpp:1390