LLVM 20.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// 6. The VPlanPrinter class providing a way to print a plan in dot format;
21/// These are documented in docs/VectorizationPlan.rst.
22//
23//===----------------------------------------------------------------------===//
24
25#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
27
28#include "VPlanAnalysis.h"
29#include "VPlanValue.h"
30#include "llvm/ADT/DenseMap.h"
34#include "llvm/ADT/Twine.h"
35#include "llvm/ADT/ilist.h"
36#include "llvm/ADT/ilist_node.h"
42#include "llvm/IR/DebugLoc.h"
43#include "llvm/IR/FMF.h"
44#include "llvm/IR/Operator.h"
46#include <algorithm>
47#include <cassert>
48#include <cstddef>
49#include <string>
50
51namespace llvm {
52
53class BasicBlock;
54class DominatorTree;
55class InnerLoopVectorizer;
56class IRBuilderBase;
57class LoopInfo;
58class raw_ostream;
59class RecurrenceDescriptor;
60class SCEV;
61class Type;
62class VPBasicBlock;
63class VPRegionBlock;
64class VPlan;
65class VPReplicateRecipe;
66class VPlanSlp;
67class Value;
68class LoopVectorizationCostModel;
69class LoopVersioning;
70
71struct VPCostContext;
72
73namespace Intrinsic {
74typedef unsigned ID;
75}
76
77/// Returns a calculation for the total number of elements for a given \p VF.
78/// For fixed width vectors this value is a constant, whereas for scalable
79/// vectors it is an expression determined at runtime.
80Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
81
82/// Return a value for Step multiplied by VF.
83Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
84 int64_t Step);
85
86/// A helper function that returns the reciprocal of the block probability of
87/// predicated blocks. If we return X, we are assuming the predicated block
88/// will execute once for every X iterations of the loop header.
89///
90/// TODO: We should use actual block probability here, if available. Currently,
91/// we always assume predicated blocks have a 50% chance of executing.
92inline unsigned getReciprocalPredBlockProb() { return 2; }
93
94/// A range of powers-of-2 vectorization factors with fixed start and
95/// adjustable end. The range includes start and excludes end, e.g.,:
96/// [1, 16) = {1, 2, 4, 8}
97struct VFRange {
98 // A power of 2.
100
101 // A power of 2. If End <= Start range is empty.
103
104 bool isEmpty() const {
106 }
107
109 : Start(Start), End(End) {
111 "Both Start and End should have the same scalable flag");
113 "Expected Start to be a power of 2");
115 "Expected End to be a power of 2");
116 }
117
118 /// Iterator to iterate over vectorization factors in a VFRange.
120 : public iterator_facade_base<iterator, std::forward_iterator_tag,
121 ElementCount> {
122 ElementCount VF;
123
124 public:
125 iterator(ElementCount VF) : VF(VF) {}
126
127 bool operator==(const iterator &Other) const { return VF == Other.VF; }
128
129 ElementCount operator*() const { return VF; }
130
132 VF *= 2;
133 return *this;
134 }
135 };
136
140 return iterator(End);
141 }
142};
143
144using VPlanPtr = std::unique_ptr<VPlan>;
145
146/// In what follows, the term "input IR" refers to code that is fed into the
147/// vectorizer whereas the term "output IR" refers to code that is generated by
148/// the vectorizer.
149
150/// VPLane provides a way to access lanes in both fixed width and scalable
151/// vectors, where for the latter the lane index sometimes needs calculating
152/// as a runtime expression.
153class VPLane {
154public:
155 /// Kind describes how to interpret Lane.
156 enum class Kind : uint8_t {
157 /// For First, Lane is the index into the first N elements of a
158 /// fixed-vector <N x <ElTy>> or a scalable vector <vscale x N x <ElTy>>.
159 First,
160 /// For ScalableLast, Lane is the offset from the start of the last
161 /// N-element subvector in a scalable vector <vscale x N x <ElTy>>. For
162 /// example, a Lane of 0 corresponds to lane `(vscale - 1) * N`, a Lane of
163 /// 1 corresponds to `((vscale - 1) * N) + 1`, etc.
165 };
166
167private:
168 /// in [0..VF)
169 unsigned Lane;
170
171 /// Indicates how the Lane should be interpreted, as described above.
172 Kind LaneKind;
173
174public:
175 VPLane(unsigned Lane) : Lane(Lane), LaneKind(VPLane::Kind::First) {}
176 VPLane(unsigned Lane, Kind LaneKind) : Lane(Lane), LaneKind(LaneKind) {}
177
179
180 static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset) {
181 assert(Offset > 0 && Offset <= VF.getKnownMinValue() &&
182 "trying to extract with invalid offset");
183 unsigned LaneOffset = VF.getKnownMinValue() - Offset;
184 Kind LaneKind;
185 if (VF.isScalable())
186 // In this case 'LaneOffset' refers to the offset from the start of the
187 // last subvector with VF.getKnownMinValue() elements.
189 else
190 LaneKind = VPLane::Kind::First;
191 return VPLane(LaneOffset, LaneKind);
192 }
193
195 return getLaneFromEnd(VF, 1);
196 }
197
198 /// Returns a compile-time known value for the lane index and asserts if the
199 /// lane can only be calculated at runtime.
200 unsigned getKnownLane() const {
201 assert(LaneKind == Kind::First);
202 return Lane;
203 }
204
205 /// Returns an expression describing the lane index that can be used at
206 /// runtime.
207 Value *getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const;
208
209 /// Returns the Kind of lane offset.
210 Kind getKind() const { return LaneKind; }
211
212 /// Returns true if this is the first lane of the whole vector.
213 bool isFirstLane() const { return Lane == 0 && LaneKind == Kind::First; }
214
215 /// Maps the lane to a cache index based on \p VF.
216 unsigned mapToCacheIndex(const ElementCount &VF) const {
217 switch (LaneKind) {
219 assert(VF.isScalable() && Lane < VF.getKnownMinValue());
220 return VF.getKnownMinValue() + Lane;
221 default:
222 assert(Lane < VF.getKnownMinValue());
223 return Lane;
224 }
225 }
226
227 /// Returns the maxmimum number of lanes that we are able to consider
228 /// caching for \p VF.
229 static unsigned getNumCachedLanes(const ElementCount &VF) {
230 return VF.getKnownMinValue() * (VF.isScalable() ? 2 : 1);
231 }
232};
233
234/// VPTransformState holds information passed down when "executing" a VPlan,
235/// needed for generating the output IR.
239 InnerLoopVectorizer *ILV, VPlan *Plan, Type *CanonicalIVTy);
240 /// Target Transform Info.
242
243 /// The chosen Vectorization Factor of the loop being vectorized.
245
246 /// Hold the index to generate specific scalar instructions. Null indicates
247 /// that all instances are to be generated, using either scalar or vector
248 /// instructions.
249 std::optional<VPLane> Lane;
250
251 struct DataState {
252 // Each value from the original loop, when vectorized, is represented by a
253 // vector value in the map.
255
258
259 /// Get the generated vector Value for a given VPValue \p Def if \p IsScalar
260 /// is false, otherwise return the generated scalar. \See set.
261 Value *get(VPValue *Def, bool IsScalar = false);
262
263 /// Get the generated Value for a given VPValue and given Part and Lane.
264 Value *get(VPValue *Def, const VPLane &Lane);
265
266 bool hasVectorValue(VPValue *Def) { return Data.VPV2Vector.contains(Def); }
267
269 auto I = Data.VPV2Scalars.find(Def);
270 if (I == Data.VPV2Scalars.end())
271 return false;
272 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
273 return CacheIdx < I->second.size() && I->second[CacheIdx];
274 }
275
276 /// Set the generated vector Value for a given VPValue, if \p
277 /// IsScalar is false. If \p IsScalar is true, set the scalar in lane 0.
278 void set(VPValue *Def, Value *V, bool IsScalar = false) {
279 if (IsScalar) {
280 set(Def, V, VPLane(0));
281 return;
282 }
283 assert((VF.isScalar() || V->getType()->isVectorTy()) &&
284 "scalar values must be stored as (0, 0)");
285 Data.VPV2Vector[Def] = V;
286 }
287
288 /// Reset an existing vector value for \p Def and a given \p Part.
289 void reset(VPValue *Def, Value *V) {
290 assert(Data.VPV2Vector.contains(Def) && "need to overwrite existing value");
291 Data.VPV2Vector[Def] = V;
292 }
293
294 /// Set the generated scalar \p V for \p Def and the given \p Lane.
295 void set(VPValue *Def, Value *V, const VPLane &Lane) {
296 auto &Scalars = Data.VPV2Scalars[Def];
297 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
298 if (Scalars.size() <= CacheIdx)
299 Scalars.resize(CacheIdx + 1);
300 assert(!Scalars[CacheIdx] && "should overwrite existing value");
301 Scalars[CacheIdx] = V;
302 }
303
304 /// Reset an existing scalar value for \p Def and a given \p Lane.
305 void reset(VPValue *Def, Value *V, const VPLane &Lane) {
306 auto Iter = Data.VPV2Scalars.find(Def);
307 assert(Iter != Data.VPV2Scalars.end() &&
308 "need to overwrite existing value");
309 unsigned CacheIdx = Lane.mapToCacheIndex(VF);
310 assert(CacheIdx < Iter->second.size() &&
311 "need to overwrite existing value");
312 Iter->second[CacheIdx] = V;
313 }
314
315 /// Add additional metadata to \p To that was not present on \p Orig.
316 ///
317 /// Currently this is used to add the noalias annotations based on the
318 /// inserted memchecks. Use this for instructions that are *cloned* into the
319 /// vector loop.
320 void addNewMetadata(Instruction *To, const Instruction *Orig);
321
322 /// Add metadata from one instruction to another.
323 ///
324 /// This includes both the original MDs from \p From and additional ones (\see
325 /// addNewMetadata). Use this for *newly created* instructions in the vector
326 /// loop.
327 void addMetadata(Value *To, Instruction *From);
328
329 /// Set the debug location in the builder using the debug location \p DL.
331
332 /// Construct the vector value of a scalarized value \p V one lane at a time.
333 void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane);
334
335 /// Hold state information used when constructing the CFG of the output IR,
336 /// traversing the VPBasicBlocks and generating corresponding IR BasicBlocks.
337 struct CFGState {
338 /// The previous VPBasicBlock visited. Initially set to null.
340
341 /// The previous IR BasicBlock created or used. Initially set to the new
342 /// header BasicBlock.
343 BasicBlock *PrevBB = nullptr;
344
345 /// The last IR BasicBlock in the output IR. Set to the exit block of the
346 /// vector loop.
347 BasicBlock *ExitBB = nullptr;
348
349 /// A mapping of each VPBasicBlock to the corresponding BasicBlock. In case
350 /// of replication, maps the BasicBlock of the last replica created.
352
353 /// Updater for the DominatorTree.
355
357 : DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy) {}
358
359 /// Returns the BasicBlock* mapped to the pre-header of the loop region
360 /// containing \p R.
363
364 /// Hold a pointer to LoopInfo to register new basic blocks in the loop.
366
367 /// Hold a reference to the IRBuilder used to generate output IR code.
369
370 /// Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
372
373 /// Pointer to the VPlan code is generated for.
375
376 /// The loop object for the current parent region, or nullptr.
378
379 /// LoopVersioning. It's only set up (non-null) if memchecks were
380 /// used.
381 ///
382 /// This is currently only used to add no-alias metadata based on the
383 /// memchecks. The actually versioning is performed manually.
385
386 /// Map SCEVs to their expanded values. Populated when executing
387 /// VPExpandSCEVRecipes.
389
390 /// VPlan-based type analysis.
392};
393
394/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
395/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
397 friend class VPBlockUtils;
398
399 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
400
401 /// An optional name for the block.
402 std::string Name;
403
404 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
405 /// it is a topmost VPBlockBase.
406 VPRegionBlock *Parent = nullptr;
407
408 /// List of predecessor blocks.
410
411 /// List of successor blocks.
413
414 /// VPlan containing the block. Can only be set on the entry block of the
415 /// plan.
416 VPlan *Plan = nullptr;
417
418 /// Add \p Successor as the last successor to this block.
419 void appendSuccessor(VPBlockBase *Successor) {
420 assert(Successor && "Cannot add nullptr successor!");
421 Successors.push_back(Successor);
422 }
423
424 /// Add \p Predecessor as the last predecessor to this block.
425 void appendPredecessor(VPBlockBase *Predecessor) {
426 assert(Predecessor && "Cannot add nullptr predecessor!");
427 Predecessors.push_back(Predecessor);
428 }
429
430 /// Remove \p Predecessor from the predecessors of this block.
431 void removePredecessor(VPBlockBase *Predecessor) {
432 auto Pos = find(Predecessors, Predecessor);
433 assert(Pos && "Predecessor does not exist");
434 Predecessors.erase(Pos);
435 }
436
437 /// Remove \p Successor from the successors of this block.
438 void removeSuccessor(VPBlockBase *Successor) {
439 auto Pos = find(Successors, Successor);
440 assert(Pos && "Successor does not exist");
441 Successors.erase(Pos);
442 }
443
444 /// This function replaces one predecessor with another, useful when
445 /// trying to replace an old block in the CFG with a new one.
446 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
447 auto I = find(Predecessors, Old);
448 assert(I != Predecessors.end());
449 assert(Old->getParent() == New->getParent() &&
450 "replaced predecessor must have the same parent");
451 *I = New;
452 }
453
454 /// This function replaces one successor with another, useful when
455 /// trying to replace an old block in the CFG with a new one.
456 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
457 auto I = find(Successors, Old);
458 assert(I != Successors.end());
459 assert(Old->getParent() == New->getParent() &&
460 "replaced successor must have the same parent");
461 *I = New;
462 }
463
464protected:
465 VPBlockBase(const unsigned char SC, const std::string &N)
466 : SubclassID(SC), Name(N) {}
467
468public:
469 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
470 /// that are actually instantiated. Values of this enumeration are kept in the
471 /// SubclassID field of the VPBlockBase objects. They are used for concrete
472 /// type identification.
473 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
474
476
477 virtual ~VPBlockBase() = default;
478
479 const std::string &getName() const { return Name; }
480
481 void setName(const Twine &newName) { Name = newName.str(); }
482
483 /// \return an ID for the concrete type of this object.
484 /// This is used to implement the classof checks. This should not be used
485 /// for any other purpose, as the values may change as LLVM evolves.
486 unsigned getVPBlockID() const { return SubclassID; }
487
488 VPRegionBlock *getParent() { return Parent; }
489 const VPRegionBlock *getParent() const { return Parent; }
490
491 /// \return A pointer to the plan containing the current block.
492 VPlan *getPlan();
493 const VPlan *getPlan() const;
494
495 /// Sets the pointer of the plan containing the block. The block must be the
496 /// entry block into the VPlan.
497 void setPlan(VPlan *ParentPlan);
498
499 void setParent(VPRegionBlock *P) { Parent = P; }
500
501 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
502 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
503 /// VPBlockBase is a VPBasicBlock, it is returned.
504 const VPBasicBlock *getEntryBasicBlock() const;
506
507 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
508 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
509 /// VPBlockBase is a VPBasicBlock, it is returned.
510 const VPBasicBlock *getExitingBasicBlock() const;
512
513 const VPBlocksTy &getSuccessors() const { return Successors; }
514 VPBlocksTy &getSuccessors() { return Successors; }
515
518
519 const VPBlocksTy &getPredecessors() const { return Predecessors; }
520 VPBlocksTy &getPredecessors() { return Predecessors; }
521
522 /// \return the successor of this VPBlockBase if it has a single successor.
523 /// Otherwise return a null pointer.
525 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
526 }
527
528 /// \return the predecessor of this VPBlockBase if it has a single
529 /// predecessor. Otherwise return a null pointer.
531 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
532 }
533
534 size_t getNumSuccessors() const { return Successors.size(); }
535 size_t getNumPredecessors() const { return Predecessors.size(); }
536
537 /// An Enclosing Block of a block B is any block containing B, including B
538 /// itself. \return the closest enclosing block starting from "this", which
539 /// has successors. \return the root enclosing block if all enclosing blocks
540 /// have no successors.
542
543 /// \return the closest enclosing block starting from "this", which has
544 /// predecessors. \return the root enclosing block if all enclosing blocks
545 /// have no predecessors.
547
548 /// \return the successors either attached directly to this VPBlockBase or, if
549 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
550 /// successors of its own, search recursively for the first enclosing
551 /// VPRegionBlock that has successors and return them. If no such
552 /// VPRegionBlock exists, return the (empty) successors of the topmost
553 /// VPBlockBase reached.
556 }
557
558 /// \return the hierarchical successor of this VPBlockBase if it has a single
559 /// hierarchical successor. Otherwise return a null pointer.
562 }
563
564 /// \return the predecessors either attached directly to this VPBlockBase or,
565 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
566 /// predecessors of its own, search recursively for the first enclosing
567 /// VPRegionBlock that has predecessors and return them. If no such
568 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
569 /// VPBlockBase reached.
572 }
573
574 /// \return the hierarchical predecessor of this VPBlockBase if it has a
575 /// single hierarchical predecessor. Otherwise return a null pointer.
578 }
579
580 /// Set a given VPBlockBase \p Successor as the single successor of this
581 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
582 /// This VPBlockBase must have no successors.
584 assert(Successors.empty() && "Setting one successor when others exist.");
585 assert(Successor->getParent() == getParent() &&
586 "connected blocks must have the same parent");
587 appendSuccessor(Successor);
588 }
589
590 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
591 /// successors of this VPBlockBase. This VPBlockBase is not added as
592 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
593 /// successors.
594 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
595 assert(Successors.empty() && "Setting two successors when others exist.");
596 appendSuccessor(IfTrue);
597 appendSuccessor(IfFalse);
598 }
599
600 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
601 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
602 /// as successor of any VPBasicBlock in \p NewPreds.
604 assert(Predecessors.empty() && "Block predecessors already set.");
605 for (auto *Pred : NewPreds)
606 appendPredecessor(Pred);
607 }
608
609 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
610 /// This VPBlockBase must have no successors. This VPBlockBase is not added
611 /// as predecessor of any VPBasicBlock in \p NewSuccs.
613 assert(Successors.empty() && "Block successors already set.");
614 for (auto *Succ : NewSuccs)
615 appendSuccessor(Succ);
616 }
617
618 /// Remove all the predecessor of this block.
619 void clearPredecessors() { Predecessors.clear(); }
620
621 /// Remove all the successors of this block.
622 void clearSuccessors() { Successors.clear(); }
623
624 /// Swap successors of the block. The block must have exactly 2 successors.
625 // TODO: This should be part of introducing conditional branch recipes rather
626 // than being independent.
628 assert(Successors.size() == 2 && "must have 2 successors to swap");
629 std::swap(Successors[0], Successors[1]);
630 }
631
632 /// The method which generates the output IR that correspond to this
633 /// VPBlockBase, thereby "executing" the VPlan.
634 virtual void execute(VPTransformState *State) = 0;
635
636 /// Return the cost of the block.
638
639 /// Delete all blocks reachable from a given VPBlockBase, inclusive.
640 static void deleteCFG(VPBlockBase *Entry);
641
642 /// Return true if it is legal to hoist instructions into this block.
644 // There are currently no constraints that prevent an instruction to be
645 // hoisted into a VPBlockBase.
646 return true;
647 }
648
649 /// Replace all operands of VPUsers in the block with \p NewValue and also
650 /// replaces all uses of VPValues defined in the block with NewValue.
651 virtual void dropAllReferences(VPValue *NewValue) = 0;
652
653#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
654 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
655 OS << getName();
656 }
657
658 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
659 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
660 /// consequtive numbers.
661 ///
662 /// Note that the numbering is applied to the whole VPlan, so printing
663 /// individual blocks is consistent with the whole VPlan printing.
664 virtual void print(raw_ostream &O, const Twine &Indent,
665 VPSlotTracker &SlotTracker) const = 0;
666
667 /// Print plain-text dump of this VPlan to \p O.
668 void print(raw_ostream &O) const {
670 print(O, "", SlotTracker);
671 }
672
673 /// Print the successors of this block to \p O, prefixing all lines with \p
674 /// Indent.
675 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
676
677 /// Dump this VPBlockBase to dbgs().
678 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
679#endif
680
681 /// Clone the current block and it's recipes without updating the operands of
682 /// the cloned recipes, including all blocks in the single-entry single-exit
683 /// region for VPRegionBlocks.
684 virtual VPBlockBase *clone() = 0;
685};
686
687/// Struct to hold various analysis needed for cost computations.
695
698 : TTI(TTI), TLI(TLI), Types(CanIVTy), LLVMCtx(CanIVTy->getContext()),
699 CM(CM) {}
700
701 /// Return the cost for \p UI with \p VF using the legacy cost model as
702 /// fallback until computing the cost of all recipes migrates to VPlan.
704
705 /// Return true if the cost for \p UI shouldn't be computed, e.g. because it
706 /// has already been pre-computed.
707 bool skipCostComputation(Instruction *UI, bool IsVector) const;
708
709 /// Returns the OperandInfo for \p V, if it is a live-in.
711};
712
713/// VPRecipeBase is a base class modeling a sequence of one or more output IR
714/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
715/// and is responsible for deleting its defined values. Single-value
716/// recipes must inherit from VPSingleDef instead of inheriting from both
717/// VPRecipeBase and VPValue separately.
718class VPRecipeBase : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
719 public VPDef,
720 public VPUser {
721 friend VPBasicBlock;
722 friend class VPBlockUtils;
723
724 /// Each VPRecipe belongs to a single VPBasicBlock.
725 VPBasicBlock *Parent = nullptr;
726
727 /// The debug location for the recipe.
728 DebugLoc DL;
729
730public:
732 DebugLoc DL = {})
733 : VPDef(SC), VPUser(Operands), DL(DL) {}
734
735 template <typename IterT>
737 DebugLoc DL = {})
738 : VPDef(SC), VPUser(Operands), DL(DL) {}
739 virtual ~VPRecipeBase() = default;
740
741 /// Clone the current recipe.
742 virtual VPRecipeBase *clone() = 0;
743
744 /// \return the VPBasicBlock which this VPRecipe belongs to.
745 VPBasicBlock *getParent() { return Parent; }
746 const VPBasicBlock *getParent() const { return Parent; }
747
748 /// The method which generates the output IR instructions that correspond to
749 /// this VPRecipe, thereby "executing" the VPlan.
750 virtual void execute(VPTransformState &State) = 0;
751
752 /// Return the cost of this recipe, taking into account if the cost
753 /// computation should be skipped and the ForceTargetInstructionCost flag.
754 /// Also takes care of printing the cost for debugging.
756
757 /// Insert an unlinked recipe into a basic block immediately before
758 /// the specified recipe.
759 void insertBefore(VPRecipeBase *InsertPos);
760 /// Insert an unlinked recipe into \p BB immediately before the insertion
761 /// point \p IP;
763
764 /// Insert an unlinked Recipe into a basic block immediately after
765 /// the specified Recipe.
766 void insertAfter(VPRecipeBase *InsertPos);
767
768 /// Unlink this recipe from its current VPBasicBlock and insert it into
769 /// the VPBasicBlock that MovePos lives in, right after MovePos.
770 void moveAfter(VPRecipeBase *MovePos);
771
772 /// Unlink this recipe and insert into BB before I.
773 ///
774 /// \pre I is a valid iterator into BB.
776
777 /// This method unlinks 'this' from the containing basic block, but does not
778 /// delete it.
779 void removeFromParent();
780
781 /// This method unlinks 'this' from the containing basic block and deletes it.
782 ///
783 /// \returns an iterator pointing to the element after the erased one
785
786 /// Method to support type inquiry through isa, cast, and dyn_cast.
787 static inline bool classof(const VPDef *D) {
788 // All VPDefs are also VPRecipeBases.
789 return true;
790 }
791
792 static inline bool classof(const VPUser *U) { return true; }
793
794 /// Returns true if the recipe may have side-effects.
795 bool mayHaveSideEffects() const;
796
797 /// Returns true for PHI-like recipes.
798 bool isPhi() const {
799 return getVPDefID() >= VPFirstPHISC && getVPDefID() <= VPLastPHISC;
800 }
801
802 /// Returns true if the recipe may read from memory.
803 bool mayReadFromMemory() const;
804
805 /// Returns true if the recipe may write to memory.
806 bool mayWriteToMemory() const;
807
808 /// Returns true if the recipe may read from or write to memory.
809 bool mayReadOrWriteMemory() const {
811 }
812
813 /// Returns the debug location of the recipe.
814 DebugLoc getDebugLoc() const { return DL; }
815
816protected:
817 /// Compute the cost of this recipe either using a recipe's specialized
818 /// implementation or using the legacy cost model and the underlying
819 /// instructions.
821 VPCostContext &Ctx) const;
822};
823
824// Helper macro to define common classof implementations for recipes.
825#define VP_CLASSOF_IMPL(VPDefID) \
826 static inline bool classof(const VPDef *D) { \
827 return D->getVPDefID() == VPDefID; \
828 } \
829 static inline bool classof(const VPValue *V) { \
830 auto *R = V->getDefiningRecipe(); \
831 return R && R->getVPDefID() == VPDefID; \
832 } \
833 static inline bool classof(const VPUser *U) { \
834 auto *R = dyn_cast<VPRecipeBase>(U); \
835 return R && R->getVPDefID() == VPDefID; \
836 } \
837 static inline bool classof(const VPRecipeBase *R) { \
838 return R->getVPDefID() == VPDefID; \
839 } \
840 static inline bool classof(const VPSingleDefRecipe *R) { \
841 return R->getVPDefID() == VPDefID; \
842 }
843
844/// VPSingleDef is a base class for recipes for modeling a sequence of one or
845/// more output IR that define a single result VPValue.
846/// Note that VPRecipeBase must be inherited from before VPValue.
847class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
848public:
849 template <typename IterT>
850 VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL = {})
851 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
852
853 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
854 DebugLoc DL = {})
855 : VPRecipeBase(SC, Operands, DL), VPValue(this) {}
856
857 template <typename IterT>
858 VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV,
859 DebugLoc DL = {})
860 : VPRecipeBase(SC, Operands, DL), VPValue(this, UV) {}
861
862 static inline bool classof(const VPRecipeBase *R) {
863 switch (R->getVPDefID()) {
864 case VPRecipeBase::VPDerivedIVSC:
865 case VPRecipeBase::VPEVLBasedIVPHISC:
866 case VPRecipeBase::VPExpandSCEVSC:
867 case VPRecipeBase::VPInstructionSC:
868 case VPRecipeBase::VPReductionEVLSC:
869 case VPRecipeBase::VPReductionSC:
870 case VPRecipeBase::VPReplicateSC:
871 case VPRecipeBase::VPScalarIVStepsSC:
872 case VPRecipeBase::VPVectorPointerSC:
873 case VPRecipeBase::VPReverseVectorPointerSC:
874 case VPRecipeBase::VPWidenCallSC:
875 case VPRecipeBase::VPWidenCanonicalIVSC:
876 case VPRecipeBase::VPWidenCastSC:
877 case VPRecipeBase::VPWidenGEPSC:
878 case VPRecipeBase::VPWidenIntrinsicSC:
879 case VPRecipeBase::VPWidenSC:
880 case VPRecipeBase::VPWidenEVLSC:
881 case VPRecipeBase::VPWidenSelectSC:
882 case VPRecipeBase::VPBlendSC:
883 case VPRecipeBase::VPPredInstPHISC:
884 case VPRecipeBase::VPCanonicalIVPHISC:
885 case VPRecipeBase::VPActiveLaneMaskPHISC:
886 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
887 case VPRecipeBase::VPWidenPHISC:
888 case VPRecipeBase::VPWidenIntOrFpInductionSC:
889 case VPRecipeBase::VPWidenPointerInductionSC:
890 case VPRecipeBase::VPReductionPHISC:
891 case VPRecipeBase::VPScalarCastSC:
892 return true;
893 case VPRecipeBase::VPBranchOnMaskSC:
894 case VPRecipeBase::VPInterleaveSC:
895 case VPRecipeBase::VPIRInstructionSC:
896 case VPRecipeBase::VPWidenLoadEVLSC:
897 case VPRecipeBase::VPWidenLoadSC:
898 case VPRecipeBase::VPWidenStoreEVLSC:
899 case VPRecipeBase::VPWidenStoreSC:
900 case VPRecipeBase::VPHistogramSC:
901 // TODO: Widened stores don't define a value, but widened loads do. Split
902 // the recipes to be able to make widened loads VPSingleDefRecipes.
903 return false;
904 }
905 llvm_unreachable("Unhandled VPDefID");
906 }
907
908 static inline bool classof(const VPUser *U) {
909 auto *R = dyn_cast<VPRecipeBase>(U);
910 return R && classof(R);
911 }
912
913 virtual VPSingleDefRecipe *clone() override = 0;
914
915 /// Returns the underlying instruction.
917 return cast<Instruction>(getUnderlyingValue());
918 }
920 return cast<Instruction>(getUnderlyingValue());
921 }
922
923#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
924 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
925 LLVM_DUMP_METHOD void dump() const;
926#endif
927};
928
929/// Class to record LLVM IR flag for a recipe along with it.
931 enum class OperationType : unsigned char {
932 Cmp,
933 OverflowingBinOp,
934 DisjointOp,
935 PossiblyExactOp,
936 GEPOp,
937 FPMathOp,
938 NonNegOp,
939 Other
940 };
941
942public:
943 struct WrapFlagsTy {
944 char HasNUW : 1;
945 char HasNSW : 1;
946
948 };
949
951 char IsDisjoint : 1;
953 };
954
955private:
956 struct ExactFlagsTy {
957 char IsExact : 1;
958 };
959 struct NonNegFlagsTy {
960 char NonNeg : 1;
961 };
962 struct FastMathFlagsTy {
963 char AllowReassoc : 1;
964 char NoNaNs : 1;
965 char NoInfs : 1;
966 char NoSignedZeros : 1;
967 char AllowReciprocal : 1;
968 char AllowContract : 1;
969 char ApproxFunc : 1;
970
971 FastMathFlagsTy(const FastMathFlags &FMF);
972 };
973
974 OperationType OpType;
975
976 union {
980 ExactFlagsTy ExactFlags;
982 NonNegFlagsTy NonNegFlags;
983 FastMathFlagsTy FMFs;
984 unsigned AllFlags;
985 };
986
987protected:
989 OpType = Other.OpType;
990 AllFlags = Other.AllFlags;
991 }
992
993public:
994 template <typename IterT>
995 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL = {})
996 : VPSingleDefRecipe(SC, Operands, DL) {
997 OpType = OperationType::Other;
998 AllFlags = 0;
999 }
1000
1001 template <typename IterT>
1002 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
1004 if (auto *Op = dyn_cast<CmpInst>(&I)) {
1005 OpType = OperationType::Cmp;
1006 CmpPredicate = Op->getPredicate();
1007 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
1008 OpType = OperationType::DisjointOp;
1009 DisjointFlags.IsDisjoint = Op->isDisjoint();
1010 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
1011 OpType = OperationType::OverflowingBinOp;
1012 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
1013 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
1014 OpType = OperationType::PossiblyExactOp;
1015 ExactFlags.IsExact = Op->isExact();
1016 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
1017 OpType = OperationType::GEPOp;
1018 GEPFlags = GEP->getNoWrapFlags();
1019 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
1020 OpType = OperationType::NonNegOp;
1021 NonNegFlags.NonNeg = PNNI->hasNonNeg();
1022 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
1023 OpType = OperationType::FPMathOp;
1024 FMFs = Op->getFastMathFlags();
1025 } else {
1026 OpType = OperationType::Other;
1027 AllFlags = 0;
1028 }
1029 }
1030
1031 template <typename IterT>
1032 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1033 CmpInst::Predicate Pred, DebugLoc DL = {})
1034 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::Cmp),
1035 CmpPredicate(Pred) {}
1036
1037 template <typename IterT>
1038 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1040 : VPSingleDefRecipe(SC, Operands, DL),
1041 OpType(OperationType::OverflowingBinOp), WrapFlags(WrapFlags) {}
1042
1043 template <typename IterT>
1044 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1045 FastMathFlags FMFs, DebugLoc DL = {})
1046 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::FPMathOp),
1047 FMFs(FMFs) {}
1048
1049 template <typename IterT>
1050 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1052 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::DisjointOp),
1054
1055protected:
1056 template <typename IterT>
1057 VPRecipeWithIRFlags(const unsigned char SC, IterT Operands,
1059 : VPSingleDefRecipe(SC, Operands, DL), OpType(OperationType::GEPOp),
1060 GEPFlags(GEPFlags) {}
1061
1062public:
1063 static inline bool classof(const VPRecipeBase *R) {
1064 return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
1065 R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1066 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
1067 R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
1068 R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
1069 R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
1070 R->getVPDefID() == VPRecipeBase::VPReverseVectorPointerSC ||
1071 R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
1072 }
1073
1074 static inline bool classof(const VPUser *U) {
1075 auto *R = dyn_cast<VPRecipeBase>(U);
1076 return R && classof(R);
1077 }
1078
1079 /// Drop all poison-generating flags.
1081 // NOTE: This needs to be kept in-sync with
1082 // Instruction::dropPoisonGeneratingFlags.
1083 switch (OpType) {
1084 case OperationType::OverflowingBinOp:
1085 WrapFlags.HasNUW = false;
1086 WrapFlags.HasNSW = false;
1087 break;
1088 case OperationType::DisjointOp:
1089 DisjointFlags.IsDisjoint = false;
1090 break;
1091 case OperationType::PossiblyExactOp:
1092 ExactFlags.IsExact = false;
1093 break;
1094 case OperationType::GEPOp:
1096 break;
1097 case OperationType::FPMathOp:
1098 FMFs.NoNaNs = false;
1099 FMFs.NoInfs = false;
1100 break;
1101 case OperationType::NonNegOp:
1102 NonNegFlags.NonNeg = false;
1103 break;
1104 case OperationType::Cmp:
1105 case OperationType::Other:
1106 break;
1107 }
1108 }
1109
1110 /// Set the IR flags for \p I.
1111 void setFlags(Instruction *I) const {
1112 switch (OpType) {
1113 case OperationType::OverflowingBinOp:
1114 I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
1115 I->setHasNoSignedWrap(WrapFlags.HasNSW);
1116 break;
1117 case OperationType::DisjointOp:
1118 cast<PossiblyDisjointInst>(I)->setIsDisjoint(DisjointFlags.IsDisjoint);
1119 break;
1120 case OperationType::PossiblyExactOp:
1121 I->setIsExact(ExactFlags.IsExact);
1122 break;
1123 case OperationType::GEPOp:
1124 cast<GetElementPtrInst>(I)->setNoWrapFlags(GEPFlags);
1125 break;
1126 case OperationType::FPMathOp:
1127 I->setHasAllowReassoc(FMFs.AllowReassoc);
1128 I->setHasNoNaNs(FMFs.NoNaNs);
1129 I->setHasNoInfs(FMFs.NoInfs);
1130 I->setHasNoSignedZeros(FMFs.NoSignedZeros);
1131 I->setHasAllowReciprocal(FMFs.AllowReciprocal);
1132 I->setHasAllowContract(FMFs.AllowContract);
1133 I->setHasApproxFunc(FMFs.ApproxFunc);
1134 break;
1135 case OperationType::NonNegOp:
1136 I->setNonNeg(NonNegFlags.NonNeg);
1137 break;
1138 case OperationType::Cmp:
1139 case OperationType::Other:
1140 break;
1141 }
1142 }
1143
1145 assert(OpType == OperationType::Cmp &&
1146 "recipe doesn't have a compare predicate");
1147 return CmpPredicate;
1148 }
1149
1151
1152 /// Returns true if the recipe has fast-math flags.
1153 bool hasFastMathFlags() const { return OpType == OperationType::FPMathOp; }
1154
1156
1157 bool hasNoUnsignedWrap() const {
1158 assert(OpType == OperationType::OverflowingBinOp &&
1159 "recipe doesn't have a NUW flag");
1160 return WrapFlags.HasNUW;
1161 }
1162
1163 bool hasNoSignedWrap() const {
1164 assert(OpType == OperationType::OverflowingBinOp &&
1165 "recipe doesn't have a NSW flag");
1166 return WrapFlags.HasNSW;
1167 }
1168
1169 bool isDisjoint() const {
1170 assert(OpType == OperationType::DisjointOp &&
1171 "recipe cannot have a disjoing flag");
1173 }
1174
1175#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1176 void printFlags(raw_ostream &O) const;
1177#endif
1178};
1179
1180/// Helper to access the operand that contains the unroll part for this recipe
1181/// after unrolling.
1182template <unsigned PartOpIdx> class VPUnrollPartAccessor {
1183protected:
1184 /// Return the VPValue operand containing the unroll part or null if there is
1185 /// no such operand.
1187
1188 /// Return the unroll part.
1189 unsigned getUnrollPart(VPUser &U) const;
1190};
1191
1192/// This is a concrete Recipe that models a single VPlan-level instruction.
1193/// While as any Recipe it may generate a sequence of IR instructions when
1194/// executed, these instructions would always form a single-def expression as
1195/// the VPInstruction is also a single def-use vertex.
1197 public VPUnrollPartAccessor<1> {
1198 friend class VPlanSlp;
1199
1200public:
1201 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1202 enum {
1204 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1205 // values of a first-order recurrence.
1211 /// Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
1212 /// The first operand is the incoming value from the predecessor in VPlan,
1213 /// the second operand is the incoming value for all other predecessors
1214 /// (which are currently not modeled in VPlan).
1217 // Increment the canonical IV separately for each unrolled part.
1222 // Takes the VPValue to extract from as first operand and the lane or part
1223 // to extract as second operand, counting from the end starting with 1 for
1224 // last. The second operand must be a positive constant and <= VF.
1226 LogicalAnd, // Non-poison propagating logical And.
1227 // Add an offset in bytes (second operand) to a base pointer (first
1228 // operand). Only generates scalar values (either for the first lane only or
1229 // for all lanes, depending on its uses).
1231 // Returns a scalar boolean value, which is true if any lane of its single
1232 // operand is true.
1234 };
1235
1236private:
1237 typedef unsigned char OpcodeTy;
1238 OpcodeTy Opcode;
1239
1240 /// An optional name that can be used for the generated IR instruction.
1241 const std::string Name;
1242
1243 /// Returns true if this VPInstruction generates scalar values for all lanes.
1244 /// Most VPInstructions generate a single value per part, either vector or
1245 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1246 /// values per all lanes, stemming from an original ingredient. This method
1247 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1248 /// underlying ingredient.
1249 bool doesGeneratePerAllLanes() const;
1250
1251 /// Returns true if we can generate a scalar for the first lane only if
1252 /// needed.
1253 bool canGenerateScalarForFirstLane() const;
1254
1255 /// Utility methods serving execute(): generates a single vector instance of
1256 /// the modeled instruction. \returns the generated value. . In some cases an
1257 /// existing value is returned rather than a generated one.
1258 Value *generate(VPTransformState &State);
1259
1260 /// Utility methods serving execute(): generates a scalar single instance of
1261 /// the modeled instruction for a given lane. \returns the scalar generated
1262 /// value for lane \p Lane.
1263 Value *generatePerLane(VPTransformState &State, const VPLane &Lane);
1264
1265#if !defined(NDEBUG)
1266 /// Return true if the VPInstruction is a floating point math operation, i.e.
1267 /// has fast-math flags.
1268 bool isFPMathOp() const;
1269#endif
1270
1271public:
1273 const Twine &Name = "")
1274 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DL),
1275 Opcode(Opcode), Name(Name.str()) {}
1276
1277 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1278 DebugLoc DL = {}, const Twine &Name = "")
1280
1281 VPInstruction(unsigned Opcode, CmpInst::Predicate Pred, VPValue *A,
1282 VPValue *B, DebugLoc DL = {}, const Twine &Name = "");
1283
1284 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1285 WrapFlagsTy WrapFlags, DebugLoc DL = {}, const Twine &Name = "")
1286 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, WrapFlags, DL),
1287 Opcode(Opcode), Name(Name.str()) {}
1288
1289 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1290 DisjointFlagsTy DisjointFlag, DebugLoc DL = {},
1291 const Twine &Name = "")
1292 : VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, DisjointFlag, DL),
1293 Opcode(Opcode), Name(Name.str()) {
1294 assert(Opcode == Instruction::Or && "only OR opcodes can be disjoint");
1295 }
1296
1298 DebugLoc DL = {}, const Twine &Name = "")
1299 : VPRecipeWithIRFlags(VPDef::VPInstructionSC,
1300 ArrayRef<VPValue *>({Ptr, Offset}), Flags, DL),
1301 Opcode(VPInstruction::PtrAdd), Name(Name.str()) {}
1302
1303 VPInstruction(unsigned Opcode, std::initializer_list<VPValue *> Operands,
1304 FastMathFlags FMFs, DebugLoc DL = {}, const Twine &Name = "");
1305
1306 VP_CLASSOF_IMPL(VPDef::VPInstructionSC)
1307
1308 VPInstruction *clone() override {
1310 auto *New = new VPInstruction(Opcode, Operands, getDebugLoc(), Name);
1311 New->transferFlags(*this);
1312 return New;
1313 }
1314
1315 unsigned getOpcode() const { return Opcode; }
1316
1317 /// Generate the instruction.
1318 /// TODO: We currently execute only per-part unless a specific instance is
1319 /// provided.
1320 void execute(VPTransformState &State) override;
1321
1322 /// Return the cost of this VPInstruction.
1324 VPCostContext &Ctx) const override {
1325 // TODO: Compute accurate cost after retiring the legacy cost model.
1326 return 0;
1327 }
1328
1329#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1330 /// Print the VPInstruction to \p O.
1331 void print(raw_ostream &O, const Twine &Indent,
1332 VPSlotTracker &SlotTracker) const override;
1333
1334 /// Print the VPInstruction to dbgs() (for debugging).
1335 LLVM_DUMP_METHOD void dump() const;
1336#endif
1337
1338 bool hasResult() const {
1339 // CallInst may or may not have a result, depending on the called function.
1340 // Conservatively return calls have results for now.
1341 switch (getOpcode()) {
1342 case Instruction::Ret:
1343 case Instruction::Br:
1344 case Instruction::Store:
1345 case Instruction::Switch:
1346 case Instruction::IndirectBr:
1347 case Instruction::Resume:
1348 case Instruction::CatchRet:
1349 case Instruction::Unreachable:
1350 case Instruction::Fence:
1351 case Instruction::AtomicRMW:
1354 return false;
1355 default:
1356 return true;
1357 }
1358 }
1359
1360 /// Returns true if the recipe only uses the first lane of operand \p Op.
1361 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1362
1363 /// Returns true if the recipe only uses the first part of operand \p Op.
1364 bool onlyFirstPartUsed(const VPValue *Op) const override;
1365
1366 /// Returns true if this VPInstruction produces a scalar value from a vector,
1367 /// e.g. by performing a reduction or extracting a lane.
1368 bool isVectorToScalar() const;
1369
1370 /// Returns true if this VPInstruction's operands are single scalars and the
1371 /// result is also a single scalar.
1372 bool isSingleScalar() const;
1373
1374 /// Returns the symbolic name assigned to the VPInstruction.
1375 StringRef getName() const { return Name; }
1376};
1377
1378/// A recipe to wrap on original IR instruction not to be modified during
1379/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed,
1380/// and it is used to add a new incoming value for the single predecessor VPBB.
1381/// Expect PHIs, VPIRInstructions cannot have any operands.
1383 Instruction &I;
1384
1385public:
1387 : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef<VPValue *>()), I(I) {}
1388
1389 ~VPIRInstruction() override = default;
1390
1391 VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC)
1392
1394 auto *R = new VPIRInstruction(I);
1395 for (auto *Op : operands())
1396 R->addOperand(Op);
1397 return R;
1398 }
1399
1400 void execute(VPTransformState &State) override;
1401
1402 /// Return the cost of this VPIRInstruction.
1404 VPCostContext &Ctx) const override;
1405
1406 Instruction &getInstruction() const { return I; }
1407
1408#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1409 /// Print the recipe.
1410 void print(raw_ostream &O, const Twine &Indent,
1411 VPSlotTracker &SlotTracker) const override;
1412#endif
1413
1414 bool usesScalars(const VPValue *Op) const override {
1416 "Op must be an operand of the recipe");
1417 return true;
1418 }
1419
1420 bool onlyFirstPartUsed(const VPValue *Op) const override {
1422 "Op must be an operand of the recipe");
1423 return true;
1424 }
1425};
1426
1427/// VPWidenRecipe is a recipe for producing a widened instruction using the
1428/// opcode and operands of the recipe. This recipe covers most of the
1429/// traditional vectorization cases where each recipe transforms into a
1430/// vectorized version of itself.
1432 unsigned Opcode;
1433
1434protected:
1435 template <typename IterT>
1436 VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1438 : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1439
1440public:
1441 template <typename IterT>
1443 : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1444
1445 ~VPWidenRecipe() override = default;
1446
1447 VPWidenRecipe *clone() override {
1448 auto *R = new VPWidenRecipe(*getUnderlyingInstr(), operands());
1449 R->transferFlags(*this);
1450 return R;
1451 }
1452
1453 static inline bool classof(const VPRecipeBase *R) {
1454 return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1455 R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1456 }
1457
1458 static inline bool classof(const VPUser *U) {
1459 auto *R = dyn_cast<VPRecipeBase>(U);
1460 return R && classof(R);
1461 }
1462
1463 /// Produce a widened instruction using the opcode and operands of the recipe,
1464 /// processing State.VF elements.
1465 void execute(VPTransformState &State) override;
1466
1467 /// Return the cost of this VPWidenRecipe.
1469 VPCostContext &Ctx) const override;
1470
1471 unsigned getOpcode() const { return Opcode; }
1472
1473#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1474 /// Print the recipe.
1475 void print(raw_ostream &O, const Twine &Indent,
1476 VPSlotTracker &SlotTracker) const override;
1477#endif
1478};
1479
1480/// A recipe for widening operations with vector-predication intrinsics with
1481/// explicit vector length (EVL).
1484
1485public:
1486 template <typename IterT>
1488 : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1489 addOperand(&EVL);
1490 }
1492 : VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1493 transferFlags(W);
1494 }
1495
1496 ~VPWidenEVLRecipe() override = default;
1497
1498 VPWidenRecipe *clone() override final {
1499 llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1500 return nullptr;
1501 }
1502
1503 VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1504
1506 const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1507
1508 /// Produce a vp-intrinsic using the opcode and operands of the recipe,
1509 /// processing EVL elements.
1510 void execute(VPTransformState &State) override final;
1511
1512 /// Returns true if the recipe only uses the first lane of operand \p Op.
1513 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1515 "Op must be an operand of the recipe");
1516 // EVL in that recipe is always the last operand, thus any use before means
1517 // the VPValue should be vectorized.
1518 return getEVL() == Op;
1519 }
1520
1521#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522 /// Print the recipe.
1523 void print(raw_ostream &O, const Twine &Indent,
1524 VPSlotTracker &SlotTracker) const override final;
1525#endif
1526};
1527
1528/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1530 /// Cast instruction opcode.
1531 Instruction::CastOps Opcode;
1532
1533 /// Result type for the cast.
1534 Type *ResultTy;
1535
1536public:
1538 CastInst &UI)
1539 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
1540 ResultTy(ResultTy) {
1541 assert(UI.getOpcode() == Opcode &&
1542 "opcode of underlying cast doesn't match");
1543 }
1544
1546 : VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
1547 ResultTy(ResultTy) {}
1548
1549 ~VPWidenCastRecipe() override = default;
1550
1552 if (auto *UV = getUnderlyingValue())
1553 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy,
1554 *cast<CastInst>(UV));
1555
1556 return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
1557 }
1558
1559 VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
1560
1561 /// Produce widened copies of the cast.
1562 void execute(VPTransformState &State) override;
1563
1564 /// Return the cost of this VPWidenCastRecipe.
1566 VPCostContext &Ctx) const override;
1567
1568#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1569 /// Print the recipe.
1570 void print(raw_ostream &O, const Twine &Indent,
1571 VPSlotTracker &SlotTracker) const override;
1572#endif
1573
1574 Instruction::CastOps getOpcode() const { return Opcode; }
1575
1576 /// Returns the result type of the cast.
1577 Type *getResultType() const { return ResultTy; }
1578};
1579
1580/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
1582 Instruction::CastOps Opcode;
1583
1584 Type *ResultTy;
1585
1586 Value *generate(VPTransformState &State);
1587
1588public:
1590 : VPSingleDefRecipe(VPDef::VPScalarCastSC, {Op}), Opcode(Opcode),
1591 ResultTy(ResultTy) {}
1592
1593 ~VPScalarCastRecipe() override = default;
1594
1596 return new VPScalarCastRecipe(Opcode, getOperand(0), ResultTy);
1597 }
1598
1599 VP_CLASSOF_IMPL(VPDef::VPScalarCastSC)
1600
1601 void execute(VPTransformState &State) override;
1602
1603 /// Return the cost of this VPScalarCastRecipe.
1605 VPCostContext &Ctx) const override {
1606 // TODO: Compute accurate cost after retiring the legacy cost model.
1607 return 0;
1608 }
1609
1610#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1611 void print(raw_ostream &O, const Twine &Indent,
1612 VPSlotTracker &SlotTracker) const override;
1613#endif
1614
1615 /// Returns the result type of the cast.
1616 Type *getResultType() const { return ResultTy; }
1617
1618 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1619 // At the moment, only uniform codegen is implemented.
1621 "Op must be an operand of the recipe");
1622 return true;
1623 }
1624};
1625
1626/// A recipe for widening vector intrinsics.
1628 /// ID of the vector intrinsic to widen.
1629 Intrinsic::ID VectorIntrinsicID;
1630
1631 /// Scalar return type of the intrinsic.
1632 Type *ResultTy;
1633
1634 /// True if the intrinsic may read from memory.
1635 bool MayReadFromMemory;
1636
1637 /// True if the intrinsic may read write to memory.
1638 bool MayWriteToMemory;
1639
1640 /// True if the intrinsic may have side-effects.
1641 bool MayHaveSideEffects;
1642
1643public:
1645 ArrayRef<VPValue *> CallArguments, Type *Ty,
1646 DebugLoc DL = {})
1647 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
1648 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1649 MayReadFromMemory(CI.mayReadFromMemory()),
1650 MayWriteToMemory(CI.mayWriteToMemory()),
1651 MayHaveSideEffects(CI.mayHaveSideEffects()) {}
1652
1654 ArrayRef<VPValue *> CallArguments, Type *Ty,
1655 DebugLoc DL = {})
1656 : VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments),
1657 VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {
1658 LLVMContext &Ctx = Ty->getContext();
1659 AttributeList Attrs = Intrinsic::getAttributes(Ctx, VectorIntrinsicID);
1660 MemoryEffects ME = Attrs.getMemoryEffects();
1661 MayReadFromMemory = ME.onlyWritesMemory();
1662 MayWriteToMemory = ME.onlyReadsMemory();
1663 MayHaveSideEffects = MayWriteToMemory ||
1664 !Attrs.hasFnAttr(Attribute::NoUnwind) ||
1665 !Attrs.hasFnAttr(Attribute::WillReturn);
1666 }
1667
1669 std::initializer_list<VPValue *> CallArguments,
1670 Type *Ty, DebugLoc DL = {})
1671 : VPWidenIntrinsicRecipe(VectorIntrinsicID,
1672 ArrayRef<VPValue *>(CallArguments), Ty, DL) {}
1673
1674 ~VPWidenIntrinsicRecipe() override = default;
1675
1677 return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
1678 VectorIntrinsicID, {op_begin(), op_end()},
1679 ResultTy, getDebugLoc());
1680 }
1681
1682 VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)
1683
1684 /// Produce a widened version of the vector intrinsic.
1685 void execute(VPTransformState &State) override;
1686
1687 /// Return the cost of this vector intrinsic.
1689 VPCostContext &Ctx) const override;
1690
1691 /// Return the scalar return type of the intrinsic.
1692 Type *getResultType() const { return ResultTy; }
1693
1694 /// Return to name of the intrinsic as string.
1696
1697 /// Returns true if the intrinsic may read from memory.
1698 bool mayReadFromMemory() const { return MayReadFromMemory; }
1699
1700 /// Returns true if the intrinsic may write to memory.
1701 bool mayWriteToMemory() const { return MayWriteToMemory; }
1702
1703 /// Returns true if the intrinsic may have side-effects.
1704 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
1705
1706#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1707 /// Print the recipe.
1708 void print(raw_ostream &O, const Twine &Indent,
1709 VPSlotTracker &SlotTracker) const override;
1710#endif
1711
1712 bool onlyFirstLaneUsed(const VPValue *Op) const override;
1713};
1714
1715/// A recipe for widening Call instructions using library calls.
1717 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
1718 /// between a given VF and the chosen vectorized variant, so there will be a
1719 /// different VPlan for each VF with a valid variant.
1720 Function *Variant;
1721
1722public:
1724 ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1725 : VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1726 *cast<Instruction>(UV)),
1727 Variant(Variant) {
1728 assert(
1729 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
1730 "last operand must be the called function");
1731 }
1732
1733 ~VPWidenCallRecipe() override = default;
1734
1736 return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1737 {op_begin(), op_end()}, getDebugLoc());
1738 }
1739
1740 VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
1741
1742 /// Produce a widened version of the call instruction.
1743 void execute(VPTransformState &State) override;
1744
1745 /// Return the cost of this VPWidenCallRecipe.
1747 VPCostContext &Ctx) const override;
1748
1750 return cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());
1751 }
1752
1754 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1755 }
1757 return make_range(op_begin(), op_begin() + getNumOperands() - 1);
1758 }
1759
1760#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1761 /// Print the recipe.
1762 void print(raw_ostream &O, const Twine &Indent,
1763 VPSlotTracker &SlotTracker) const override;
1764#endif
1765};
1766
1767/// A recipe representing a sequence of load -> update -> store as part of
1768/// a histogram operation. This means there may be aliasing between vector
1769/// lanes, which is handled by the llvm.experimental.vector.histogram family
1770/// of intrinsics. The only update operations currently supported are
1771/// 'add' and 'sub' where the other term is loop-invariant.
1773 /// Opcode of the update operation, currently either add or sub.
1774 unsigned Opcode;
1775
1776public:
1777 template <typename IterT>
1778 VPHistogramRecipe(unsigned Opcode, iterator_range<IterT> Operands,
1779 DebugLoc DL = {})
1780 : VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Opcode(Opcode) {}
1781
1782 ~VPHistogramRecipe() override = default;
1783
1785 return new VPHistogramRecipe(Opcode, operands(), getDebugLoc());
1786 }
1787
1788 VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
1789
1790 /// Produce a vectorized histogram operation.
1791 void execute(VPTransformState &State) override;
1792
1793 /// Return the cost of this VPHistogramRecipe.
1795 VPCostContext &Ctx) const override;
1796
1797 unsigned getOpcode() const { return Opcode; }
1798
1799 /// Return the mask operand if one was provided, or a null pointer if all
1800 /// lanes should be executed unconditionally.
1801 VPValue *getMask() const {
1802 return getNumOperands() == 3 ? getOperand(2) : nullptr;
1803 }
1804
1805#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1806 /// Print the recipe
1807 void print(raw_ostream &O, const Twine &Indent,
1808 VPSlotTracker &SlotTracker) const override;
1809#endif
1810};
1811
1812/// A recipe for widening select instructions.
1814 template <typename IterT>
1816 : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
1817 I.getDebugLoc()) {}
1818
1819 ~VPWidenSelectRecipe() override = default;
1820
1822 return new VPWidenSelectRecipe(*cast<SelectInst>(getUnderlyingInstr()),
1823 operands());
1824 }
1825
1826 VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
1827
1828 /// Produce a widened version of the select instruction.
1829 void execute(VPTransformState &State) override;
1830
1831 /// Return the cost of this VPWidenSelectRecipe.
1833 VPCostContext &Ctx) const override;
1834
1835#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1836 /// Print the recipe.
1837 void print(raw_ostream &O, const Twine &Indent,
1838 VPSlotTracker &SlotTracker) const override;
1839#endif
1840
1841 VPValue *getCond() const {
1842 return getOperand(0);
1843 }
1844
1845 bool isInvariantCond() const {
1847 }
1848};
1849
1850/// A recipe for handling GEP instructions.
1852 bool isPointerLoopInvariant() const {
1854 }
1855
1856 bool isIndexLoopInvariant(unsigned I) const {
1858 }
1859
1860 bool areAllOperandsInvariant() const {
1861 return all_of(operands(), [](VPValue *Op) {
1862 return Op->isDefinedOutsideLoopRegions();
1863 });
1864 }
1865
1866public:
1867 template <typename IterT>
1869 : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP) {}
1870
1871 ~VPWidenGEPRecipe() override = default;
1872
1874 return new VPWidenGEPRecipe(cast<GetElementPtrInst>(getUnderlyingInstr()),
1875 operands());
1876 }
1877
1878 VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
1879
1880 /// Generate the gep nodes.
1881 void execute(VPTransformState &State) override;
1882
1883 /// Return the cost of this VPWidenGEPRecipe.
1885 VPCostContext &Ctx) const override {
1886 // TODO: Compute accurate cost after retiring the legacy cost model.
1887 return 0;
1888 }
1889
1890#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1891 /// Print the recipe.
1892 void print(raw_ostream &O, const Twine &Indent,
1893 VPSlotTracker &SlotTracker) const override;
1894#endif
1895};
1896
1897/// A recipe to compute the pointers for widened memory accesses of IndexTy
1898/// in reverse order.
1900 public VPUnrollPartAccessor<2> {
1901 Type *IndexedTy;
1902
1903public:
1906 : VPRecipeWithIRFlags(VPDef::VPReverseVectorPointerSC,
1907 ArrayRef<VPValue *>({Ptr, VF}), GEPFlags, DL),
1908 IndexedTy(IndexedTy) {}
1909
1910 VP_CLASSOF_IMPL(VPDef::VPReverseVectorPointerSC)
1911
1913 const VPValue *getVFValue() const { return getOperand(1); }
1914
1915 void execute(VPTransformState &State) override;
1916
1917 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1919 "Op must be an operand of the recipe");
1920 return true;
1921 }
1922
1923 /// Return the cost of this VPVectorPointerRecipe.
1925 VPCostContext &Ctx) const override {
1926 // TODO: Compute accurate cost after retiring the legacy cost model.
1927 return 0;
1928 }
1929
1930 /// Returns true if the recipe only uses the first part of operand \p Op.
1931 bool onlyFirstPartUsed(const VPValue *Op) const override {
1933 "Op must be an operand of the recipe");
1934 assert(getNumOperands() <= 2 && "must have at most two operands");
1935 return true;
1936 }
1937
1940 IndexedTy, getGEPNoWrapFlags(),
1941 getDebugLoc());
1942 }
1943
1944#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1945 /// Print the recipe.
1946 void print(raw_ostream &O, const Twine &Indent,
1947 VPSlotTracker &SlotTracker) const override;
1948#endif
1949};
1950
1951/// A recipe to compute the pointers for widened memory accesses of IndexTy.
1953 public VPUnrollPartAccessor<1> {
1954 Type *IndexedTy;
1955
1956public:
1958 DebugLoc DL)
1959 : VPRecipeWithIRFlags(VPDef::VPVectorPointerSC, ArrayRef<VPValue *>(Ptr),
1960 GEPFlags, DL),
1961 IndexedTy(IndexedTy) {}
1962
1963 VP_CLASSOF_IMPL(VPDef::VPVectorPointerSC)
1964
1965 void execute(VPTransformState &State) override;
1966
1967 bool onlyFirstLaneUsed(const VPValue *Op) const override {
1969 "Op must be an operand of the recipe");
1970 return true;
1971 }
1972
1973 /// Returns true if the recipe only uses the first part of operand \p Op.
1974 bool onlyFirstPartUsed(const VPValue *Op) const override {
1976 "Op must be an operand of the recipe");
1977 assert(getNumOperands() <= 2 && "must have at most two operands");
1978 return true;
1979 }
1980
1982 return new VPVectorPointerRecipe(getOperand(0), IndexedTy,
1984 }
1985
1986 /// Return the cost of this VPHeaderPHIRecipe.
1988 VPCostContext &Ctx) const override {
1989 // TODO: Compute accurate cost after retiring the legacy cost model.
1990 return 0;
1991 }
1992
1993#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1994 /// Print the recipe.
1995 void print(raw_ostream &O, const Twine &Indent,
1996 VPSlotTracker &SlotTracker) const override;
1997#endif
1998};
1999
2000/// A pure virtual base class for all recipes modeling header phis, including
2001/// phis for first order recurrences, pointer inductions and reductions. The
2002/// start value is the first operand of the recipe and the incoming value from
2003/// the backedge is the second operand.
2004///
2005/// Inductions are modeled using the following sub-classes:
2006/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
2007/// starting at a specified value (zero for the main vector loop, the resume
2008/// value for the epilogue vector loop) and stepping by 1. The induction
2009/// controls exiting of the vector loop by comparing against the vector trip
2010/// count. Produces a single scalar PHI for the induction value per
2011/// iteration.
2012/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2013/// floating point inductions with arbitrary start and step values. Produces
2014/// a vector PHI per-part.
2015/// * VPDerivedIVRecipe: Converts the canonical IV value to the corresponding
2016/// value of an IV with different start and step values. Produces a single
2017/// scalar value per iteration
2018/// * VPScalarIVStepsRecipe: Generates scalar values per-lane based on a
2019/// canonical or derived induction.
2020/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2021/// pointer induction. Produces either a vector PHI per-part or scalar values
2022/// per-lane based on the canonical induction.
2024protected:
2025 VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
2026 VPValue *Start = nullptr, DebugLoc DL = {})
2027 : VPSingleDefRecipe(VPDefID, ArrayRef<VPValue *>(), UnderlyingInstr, DL) {
2028 if (Start)
2029 addOperand(Start);
2030 }
2031
2032public:
2033 ~VPHeaderPHIRecipe() override = default;
2034
2035 /// Method to support type inquiry through isa, cast, and dyn_cast.
2036 static inline bool classof(const VPRecipeBase *B) {
2037 return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
2038 B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
2039 }
2040 static inline bool classof(const VPValue *V) {
2041 auto *B = V->getDefiningRecipe();
2042 return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2043 B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
2044 }
2045
2046 /// Generate the phi nodes.
2047 void execute(VPTransformState &State) override = 0;
2048
2049 /// Return the cost of this header phi recipe.
2051 VPCostContext &Ctx) const override;
2052
2053#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2054 /// Print the recipe.
2055 void print(raw_ostream &O, const Twine &Indent,
2056 VPSlotTracker &SlotTracker) const override = 0;
2057#endif
2058
2059 /// Returns the start value of the phi, if one is set.
2061 return getNumOperands() == 0 ? nullptr : getOperand(0);
2062 }
2064 return getNumOperands() == 0 ? nullptr : getOperand(0);
2065 }
2066
2067 /// Update the start value of the recipe.
2069
2070 /// Returns the incoming value from the loop backedge.
2072 return getOperand(1);
2073 }
2074
2075 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2076 /// to be a recipe.
2079 }
2080};
2081
2082/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2083/// VPWidenPointerInductionRecipe), providing shared functionality, including
2084/// retrieving the step value, induction descriptor and original phi node.
2086 const InductionDescriptor &IndDesc;
2087
2088public:
2089 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2090 VPValue *Step, const InductionDescriptor &IndDesc,
2091 DebugLoc DL)
2092 : VPHeaderPHIRecipe(Kind, IV, Start, DL), IndDesc(IndDesc) {
2093 addOperand(Step);
2094 }
2095
2096 static inline bool classof(const VPRecipeBase *R) {
2097 return R->getVPDefID() == VPDef::VPWidenIntOrFpInductionSC ||
2098 R->getVPDefID() == VPDef::VPWidenPointerInductionSC;
2099 }
2100
2101 virtual void execute(VPTransformState &State) override = 0;
2102
2103 /// Returns the step value of the induction.
2105 const VPValue *getStepValue() const { return getOperand(1); }
2106
2107 PHINode *getPHINode() const { return cast<PHINode>(getUnderlyingValue()); }
2108
2109 /// Returns the induction descriptor for the recipe.
2110 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2111
2113 // TODO: All operands of base recipe must exist and be at same index in
2114 // derived recipe.
2116 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2117 }
2118
2120 // TODO: All operands of base recipe must exist and be at same index in
2121 // derived recipe.
2123 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2124 }
2125};
2126
2127/// A recipe for handling phi nodes of integer and floating-point inductions,
2128/// producing their vector values.
2130 TruncInst *Trunc;
2131
2132public:
2134 VPValue *VF, const InductionDescriptor &IndDesc,
2135 DebugLoc DL)
2136 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2137 Step, IndDesc, DL),
2138 Trunc(nullptr) {
2139 addOperand(VF);
2140 }
2141
2143 VPValue *VF, const InductionDescriptor &IndDesc,
2144 TruncInst *Trunc, DebugLoc DL)
2145 : VPWidenInductionRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start,
2146 Step, IndDesc, DL),
2147 Trunc(Trunc) {
2148 addOperand(VF);
2149 }
2150
2152
2157 }
2158
2159 VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
2160
2161 /// Generate the vectorized and scalarized versions of the phi node as
2162 /// needed by their users.
2163 void execute(VPTransformState &State) override;
2164
2165#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2166 /// Print the recipe.
2167 void print(raw_ostream &O, const Twine &Indent,
2168 VPSlotTracker &SlotTracker) const override;
2169#endif
2170
2172 const VPValue *getVFValue() const { return getOperand(2); }
2173
2175 // If the recipe has been unrolled (4 operands), return the VPValue for the
2176 // induction increment.
2177 return getNumOperands() == 5 ? getOperand(3) : nullptr;
2178 }
2179
2180 /// Returns the first defined value as TruncInst, if it is one or nullptr
2181 /// otherwise.
2182 TruncInst *getTruncInst() { return Trunc; }
2183 const TruncInst *getTruncInst() const { return Trunc; }
2184
2185 /// Returns true if the induction is canonical, i.e. starting at 0 and
2186 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2187 /// same type as the canonical induction.
2188 bool isCanonical() const;
2189
2190 /// Returns the scalar type of the induction.
2192 return Trunc ? Trunc->getType() : getPHINode()->getType();
2193 }
2194
2195 /// Returns the VPValue representing the value of this induction at
2196 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2197 /// take place.
2199 return getNumOperands() == 5 ? getOperand(4) : this;
2200 }
2201};
2202
2204 public VPUnrollPartAccessor<3> {
2205 bool IsScalarAfterVectorization;
2206
2207public:
2208 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2209 /// Start.
2211 const InductionDescriptor &IndDesc,
2212 bool IsScalarAfterVectorization, DebugLoc DL)
2213 : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start,
2214 Step, IndDesc, DL),
2215 IsScalarAfterVectorization(IsScalarAfterVectorization) {}
2216
2218
2221 cast<PHINode>(getUnderlyingInstr()), getOperand(0), getOperand(1),
2222 getInductionDescriptor(), IsScalarAfterVectorization, getDebugLoc());
2223 }
2224
2225 VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC)
2226
2227 /// Generate vector values for the pointer induction.
2228 void execute(VPTransformState &State) override;
2229
2230 /// Returns true if only scalar values will be generated.
2231 bool onlyScalarsGenerated(bool IsScalable);
2232
2233 /// Returns the VPValue representing the value of this induction at
2234 /// the first unrolled part, if it exists. Returns itself if unrolling did not
2235 /// take place.
2237 return getUnrollPart(*this) == 0 ? this : getOperand(2);
2238 }
2239
2240#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2241 /// Print the recipe.
2242 void print(raw_ostream &O, const Twine &Indent,
2243 VPSlotTracker &SlotTracker) const override;
2244#endif
2245};
2246
2247/// Recipe to generate a scalar PHI. Used to generate code for recipes that
2248/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
2249/// VPEVLBasedIVPHIRecipe.
2251 std::string Name;
2252
2253public:
2254 VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
2255 StringRef Name)
2256 : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
2257 Name(Name.str()) {
2258 addOperand(BackedgeValue);
2259 }
2260
2261 ~VPScalarPHIRecipe() override = default;
2262
2264 llvm_unreachable("cloning not implemented yet");
2265 }
2266
2267 VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
2268
2269 /// Generate the phi/select nodes.
2270 void execute(VPTransformState &State) override;
2271
2272 /// Returns true if the recipe only uses the first lane of operand \p Op.
2273 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2275 "Op must be an operand of the recipe");
2276 return true;
2277 }
2278
2279#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2280 /// Print the recipe.
2281 void print(raw_ostream &O, const Twine &Indent,
2282 VPSlotTracker &SlotTracker) const override;
2283#endif
2284};
2285
2286/// A recipe for handling phis that are widened in the vector loop.
2287/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
2288/// managed in the recipe directly.
2290 /// List of incoming blocks. Only used in the VPlan native path.
2291 SmallVector<VPBasicBlock *, 2> IncomingBlocks;
2292
2293public:
2294 /// Create a new VPWidenPHIRecipe for \p Phi with start value \p Start.
2295 VPWidenPHIRecipe(PHINode *Phi, VPValue *Start = nullptr)
2296 : VPSingleDefRecipe(VPDef::VPWidenPHISC, ArrayRef<VPValue *>(), Phi) {
2297 if (Start)
2298 addOperand(Start);
2299 }
2300
2302 llvm_unreachable("cloning not implemented yet");
2303 }
2304
2305 ~VPWidenPHIRecipe() override = default;
2306
2307 VP_CLASSOF_IMPL(VPDef::VPWidenPHISC)
2308
2309 /// Generate the phi/select nodes.
2310 void execute(VPTransformState &State) override;
2311
2312#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2313 /// Print the recipe.
2314 void print(raw_ostream &O, const Twine &Indent,
2315 VPSlotTracker &SlotTracker) const override;
2316#endif
2317
2318 /// Adds a pair (\p IncomingV, \p IncomingBlock) to the phi.
2319 void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock) {
2320 addOperand(IncomingV);
2321 IncomingBlocks.push_back(IncomingBlock);
2322 }
2323
2324 /// Returns the \p I th incoming VPBasicBlock.
2325 VPBasicBlock *getIncomingBlock(unsigned I) { return IncomingBlocks[I]; }
2326
2327 /// Returns the \p I th incoming VPValue.
2328 VPValue *getIncomingValue(unsigned I) { return getOperand(I); }
2329};
2330
2331/// A recipe for handling first-order recurrence phis. The start value is the
2332/// first operand of the recipe and the incoming value from the backedge is the
2333/// second operand.
2336 : VPHeaderPHIRecipe(VPDef::VPFirstOrderRecurrencePHISC, Phi, &Start) {}
2337
2338 VP_CLASSOF_IMPL(VPDef::VPFirstOrderRecurrencePHISC)
2339
2341 return R->getVPDefID() == VPDef::VPFirstOrderRecurrencePHISC;
2342 }
2343
2346 cast<PHINode>(getUnderlyingInstr()), *getOperand(0));
2347 }
2348
2349 void execute(VPTransformState &State) override;
2350
2351 /// Return the cost of this first-order recurrence phi recipe.
2353 VPCostContext &Ctx) const override;
2354
2355#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2356 /// Print the recipe.
2357 void print(raw_ostream &O, const Twine &Indent,
2358 VPSlotTracker &SlotTracker) const override;
2359#endif
2360};
2361
2362/// A recipe for handling reduction phis. The start value is the first operand
2363/// of the recipe and the incoming value from the backedge is the second
2364/// operand.
2366 public VPUnrollPartAccessor<2> {
2367 /// Descriptor for the reduction.
2368 const RecurrenceDescriptor &RdxDesc;
2369
2370 /// The phi is part of an in-loop reduction.
2371 bool IsInLoop;
2372
2373 /// The phi is part of an ordered reduction. Requires IsInLoop to be true.
2374 bool IsOrdered;
2375
2376public:
2377 /// Create a new VPReductionPHIRecipe for the reduction \p Phi described by \p
2378 /// RdxDesc.
2380 VPValue &Start, bool IsInLoop = false,
2381 bool IsOrdered = false)
2382 : VPHeaderPHIRecipe(VPDef::VPReductionPHISC, Phi, &Start),
2383 RdxDesc(RdxDesc), IsInLoop(IsInLoop), IsOrdered(IsOrdered) {
2384 assert((!IsOrdered || IsInLoop) && "IsOrdered requires IsInLoop");
2385 }
2386
2387 ~VPReductionPHIRecipe() override = default;
2388
2390 auto *R =
2391 new VPReductionPHIRecipe(cast<PHINode>(getUnderlyingInstr()), RdxDesc,
2392 *getOperand(0), IsInLoop, IsOrdered);
2393 R->addOperand(getBackedgeValue());
2394 return R;
2395 }
2396
2397 VP_CLASSOF_IMPL(VPDef::VPReductionPHISC)
2398
2400 return R->getVPDefID() == VPDef::VPReductionPHISC;
2401 }
2402
2403 /// Generate the phi/select nodes.
2404 void execute(VPTransformState &State) override;
2405
2406#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2407 /// Print the recipe.
2408 void print(raw_ostream &O, const Twine &Indent,
2409 VPSlotTracker &SlotTracker) const override;
2410#endif
2411
2413 return RdxDesc;
2414 }
2415
2416 /// Returns true, if the phi is part of an ordered reduction.
2417 bool isOrdered() const { return IsOrdered; }
2418
2419 /// Returns true, if the phi is part of an in-loop reduction.
2420 bool isInLoop() const { return IsInLoop; }
2421};
2422
2423/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2424/// instructions.
2426public:
2427 /// The blend operation is a User of the incoming values and of their
2428 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2429 /// be omitted (implied by passing an odd number of operands) in which case
2430 /// all other incoming values are merged into it.
2432 : VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, Phi->getDebugLoc()) {
2433 assert(Operands.size() > 0 && "Expected at least one operand!");
2434 }
2435
2436 VPBlendRecipe *clone() override {
2438 return new VPBlendRecipe(cast<PHINode>(getUnderlyingValue()), Ops);
2439 }
2440
2441 VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2442
2443 /// A normalized blend is one that has an odd number of operands, whereby the
2444 /// first operand does not have an associated mask.
2445 bool isNormalized() const { return getNumOperands() % 2; }
2446
2447 /// Return the number of incoming values, taking into account when normalized
2448 /// the first incoming value will have no mask.
2449 unsigned getNumIncomingValues() const {
2450 return (getNumOperands() + isNormalized()) / 2;
2451 }
2452
2453 /// Return incoming value number \p Idx.
2454 VPValue *getIncomingValue(unsigned Idx) const {
2455 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2456 }
2457
2458 /// Return mask number \p Idx.
2459 VPValue *getMask(unsigned Idx) const {
2460 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2461 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2462 }
2463
2464 /// Generate the phi/select nodes.
2465 void execute(VPTransformState &State) override;
2466
2467 /// Return the cost of this VPWidenMemoryRecipe.
2469 VPCostContext &Ctx) const override;
2470
2471#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2472 /// Print the recipe.
2473 void print(raw_ostream &O, const Twine &Indent,
2474 VPSlotTracker &SlotTracker) const override;
2475#endif
2476
2477 /// Returns true if the recipe only uses the first lane of operand \p Op.
2478 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2480 "Op must be an operand of the recipe");
2481 // Recursing through Blend recipes only, must terminate at header phi's the
2482 // latest.
2483 return all_of(users(),
2484 [this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2485 }
2486};
2487
2488/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
2489/// or stores into one wide load/store and shuffles. The first operand of a
2490/// VPInterleave recipe is the address, followed by the stored values, followed
2491/// by an optional mask.
2494
2495 /// Indicates if the interleave group is in a conditional block and requires a
2496 /// mask.
2497 bool HasMask = false;
2498
2499 /// Indicates if gaps between members of the group need to be masked out or if
2500 /// unusued gaps can be loaded speculatively.
2501 bool NeedsMaskForGaps = false;
2502
2503public:
2505 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
2506 bool NeedsMaskForGaps)
2507 : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
2508 NeedsMaskForGaps(NeedsMaskForGaps) {
2509 for (unsigned i = 0; i < IG->getFactor(); ++i)
2510 if (Instruction *I = IG->getMember(i)) {
2511 if (I->getType()->isVoidTy())
2512 continue;
2513 new VPValue(I, this);
2514 }
2515
2516 for (auto *SV : StoredValues)
2517 addOperand(SV);
2518 if (Mask) {
2519 HasMask = true;
2520 addOperand(Mask);
2521 }
2522 }
2523 ~VPInterleaveRecipe() override = default;
2524
2526 return new VPInterleaveRecipe(IG, getAddr(), getStoredValues(), getMask(),
2527 NeedsMaskForGaps);
2528 }
2529
2530 VP_CLASSOF_IMPL(VPDef::VPInterleaveSC)
2531
2532 /// Return the address accessed by this recipe.
2533 VPValue *getAddr() const {
2534 return getOperand(0); // Address is the 1st, mandatory operand.
2535 }
2536
2537 /// Return the mask used by this recipe. Note that a full mask is represented
2538 /// by a nullptr.
2539 VPValue *getMask() const {
2540 // Mask is optional and therefore the last, currently 2nd operand.
2541 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
2542 }
2543
2544 /// Return the VPValues stored by this interleave group. If it is a load
2545 /// interleave group, return an empty ArrayRef.
2547 // The first operand is the address, followed by the stored values, followed
2548 // by an optional mask.
2551 }
2552
2553 /// Generate the wide load or store, and shuffles.
2554 void execute(VPTransformState &State) override;
2555
2556 /// Return the cost of this VPInterleaveRecipe.
2558 VPCostContext &Ctx) const override;
2559
2560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2561 /// Print the recipe.
2562 void print(raw_ostream &O, const Twine &Indent,
2563 VPSlotTracker &SlotTracker) const override;
2564#endif
2565
2567
2568 /// Returns the number of stored operands of this interleave group. Returns 0
2569 /// for load interleave groups.
2570 unsigned getNumStoreOperands() const {
2571 return getNumOperands() - (HasMask ? 2 : 1);
2572 }
2573
2574 /// The recipe only uses the first lane of the address.
2575 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2577 "Op must be an operand of the recipe");
2578 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
2579 }
2580
2581 Instruction *getInsertPos() const { return IG->getInsertPos(); }
2582};
2583
2584/// A recipe to represent inloop reduction operations, performing a reduction on
2585/// a vector operand into a scalar value, and adding the result to a chain.
2586/// The Operands are {ChainOp, VecOp, [Condition]}.
2588 /// The recurrence decriptor for the reduction in question.
2589 const RecurrenceDescriptor &RdxDesc;
2590 bool IsOrdered;
2591 /// Whether the reduction is conditional.
2592 bool IsConditional = false;
2593
2594protected:
2595 VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R,
2597 VPValue *CondOp, bool IsOrdered)
2598 : VPSingleDefRecipe(SC, Operands, I), RdxDesc(R), IsOrdered(IsOrdered) {
2599 if (CondOp) {
2600 IsConditional = true;
2601 addOperand(CondOp);
2602 }
2603 }
2604
2605public:
2607 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
2608 bool IsOrdered)
2609 : VPReductionRecipe(VPDef::VPReductionSC, R, I,
2610 ArrayRef<VPValue *>({ChainOp, VecOp}), CondOp,
2611 IsOrdered) {}
2612
2613 ~VPReductionRecipe() override = default;
2614
2616 return new VPReductionRecipe(RdxDesc, getUnderlyingInstr(), getChainOp(),
2617 getVecOp(), getCondOp(), IsOrdered);
2618 }
2619
2620 static inline bool classof(const VPRecipeBase *R) {
2621 return R->getVPDefID() == VPRecipeBase::VPReductionSC ||
2622 R->getVPDefID() == VPRecipeBase::VPReductionEVLSC;
2623 }
2624
2625 static inline bool classof(const VPUser *U) {
2626 auto *R = dyn_cast<VPRecipeBase>(U);
2627 return R && classof(R);
2628 }
2629
2630 /// Generate the reduction in the loop
2631 void execute(VPTransformState &State) override;
2632
2633 /// Return the cost of VPReductionRecipe.
2635 VPCostContext &Ctx) const override;
2636
2637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2638 /// Print the recipe.
2639 void print(raw_ostream &O, const Twine &Indent,
2640 VPSlotTracker &SlotTracker) const override;
2641#endif
2642
2643 /// Return the recurrence decriptor for the in-loop reduction.
2645 return RdxDesc;
2646 }
2647 /// Return true if the in-loop reduction is ordered.
2648 bool isOrdered() const { return IsOrdered; };
2649 /// Return true if the in-loop reduction is conditional.
2650 bool isConditional() const { return IsConditional; };
2651 /// The VPValue of the scalar Chain being accumulated.
2652 VPValue *getChainOp() const { return getOperand(0); }
2653 /// The VPValue of the vector value to be reduced.
2654 VPValue *getVecOp() const { return getOperand(1); }
2655 /// The VPValue of the condition for the block.
2657 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
2658 }
2659};
2660
2661/// A recipe to represent inloop reduction operations with vector-predication
2662/// intrinsics, performing a reduction on a vector operand with the explicit
2663/// vector length (EVL) into a scalar value, and adding the result to a chain.
2664/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
2666public:
2669 VPDef::VPReductionEVLSC, R.getRecurrenceDescriptor(),
2671 ArrayRef<VPValue *>({R.getChainOp(), R.getVecOp(), &EVL}), CondOp,
2672 R.isOrdered()) {}
2673
2674 ~VPReductionEVLRecipe() override = default;
2675
2677 llvm_unreachable("cloning not implemented yet");
2678 }
2679
2680 VP_CLASSOF_IMPL(VPDef::VPReductionEVLSC)
2681
2682 /// Generate the reduction in the loop
2683 void execute(VPTransformState &State) override;
2684
2685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2686 /// Print the recipe.
2687 void print(raw_ostream &O, const Twine &Indent,
2688 VPSlotTracker &SlotTracker) const override;
2689#endif
2690
2691 /// The VPValue of the explicit vector length.
2692 VPValue *getEVL() const { return getOperand(2); }
2693
2694 /// Returns true if the recipe only uses the first lane of operand \p Op.
2695 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2697 "Op must be an operand of the recipe");
2698 return Op == getEVL();
2699 }
2700};
2701
2702/// VPReplicateRecipe replicates a given instruction producing multiple scalar
2703/// copies of the original scalar type, one per lane, instead of producing a
2704/// single copy of widened type for all lanes. If the instruction is known to be
2705/// uniform only one copy, per lane zero, will be generated.
2707 /// Indicator if only a single replica per lane is needed.
2708 bool IsUniform;
2709
2710 /// Indicator if the replicas are also predicated.
2711 bool IsPredicated;
2712
2713public:
2714 template <typename IterT>
2716 bool IsUniform, VPValue *Mask = nullptr)
2717 : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
2718 IsUniform(IsUniform), IsPredicated(Mask) {
2719 if (Mask)
2720 addOperand(Mask);
2721 }
2722
2723 ~VPReplicateRecipe() override = default;
2724
2726 auto *Copy =
2727 new VPReplicateRecipe(getUnderlyingInstr(), operands(), IsUniform,
2728 isPredicated() ? getMask() : nullptr);
2729 Copy->transferFlags(*this);
2730 return Copy;
2731 }
2732
2733 VP_CLASSOF_IMPL(VPDef::VPReplicateSC)
2734
2735 /// Generate replicas of the desired Ingredient. Replicas will be generated
2736 /// for all parts and lanes unless a specific part and lane are specified in
2737 /// the \p State.
2738 void execute(VPTransformState &State) override;
2739
2740 /// Return the cost of this VPReplicateRecipe.
2742 VPCostContext &Ctx) const override;
2743
2744#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2745 /// Print the recipe.
2746 void print(raw_ostream &O, const Twine &Indent,
2747 VPSlotTracker &SlotTracker) const override;
2748#endif
2749
2750 bool isUniform() const { return IsUniform; }
2751
2752 bool isPredicated() const { return IsPredicated; }
2753
2754 /// Returns true if the recipe only uses the first lane of operand \p Op.
2755 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2757 "Op must be an operand of the recipe");
2758 return isUniform();
2759 }
2760
2761 /// Returns true if the recipe uses scalars of operand \p Op.
2762 bool usesScalars(const VPValue *Op) const override {
2764 "Op must be an operand of the recipe");
2765 return true;
2766 }
2767
2768 /// Returns true if the recipe is used by a widened recipe via an intervening
2769 /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
2770 /// in a vector.
2771 bool shouldPack() const;
2772
2773 /// Return the mask of a predicated VPReplicateRecipe.
2775 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
2776 return getOperand(getNumOperands() - 1);
2777 }
2778
2779 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
2780};
2781
2782/// A recipe for generating conditional branches on the bits of a mask.
2784public:
2786 : VPRecipeBase(VPDef::VPBranchOnMaskSC, {}) {
2787 if (BlockInMask) // nullptr means all-one mask.
2788 addOperand(BlockInMask);
2789 }
2790
2792 return new VPBranchOnMaskRecipe(getOperand(0));
2793 }
2794
2795 VP_CLASSOF_IMPL(VPDef::VPBranchOnMaskSC)
2796
2797 /// Generate the extraction of the appropriate bit from the block mask and the
2798 /// conditional branch.
2799 void execute(VPTransformState &State) override;
2800
2801 /// Return the cost of this VPBranchOnMaskRecipe.
2803 VPCostContext &Ctx) const override;
2804
2805#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2806 /// Print the recipe.
2807 void print(raw_ostream &O, const Twine &Indent,
2808 VPSlotTracker &SlotTracker) const override {
2809 O << Indent << "BRANCH-ON-MASK ";
2810 if (VPValue *Mask = getMask())
2811 Mask->printAsOperand(O, SlotTracker);
2812 else
2813 O << " All-One";
2814 }
2815#endif
2816
2817 /// Return the mask used by this recipe. Note that a full mask is represented
2818 /// by a nullptr.
2819 VPValue *getMask() const {
2820 assert(getNumOperands() <= 1 && "should have either 0 or 1 operands");
2821 // Mask is optional.
2822 return getNumOperands() == 1 ? getOperand(0) : nullptr;
2823 }
2824
2825 /// Returns true if the recipe uses scalars of operand \p Op.
2826 bool usesScalars(const VPValue *Op) const override {
2828 "Op must be an operand of the recipe");
2829 return true;
2830 }
2831};
2832
2833/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
2834/// control converges back from a Branch-on-Mask. The phi nodes are needed in
2835/// order to merge values that are set under such a branch and feed their uses.
2836/// The phi nodes can be scalar or vector depending on the users of the value.
2837/// This recipe works in concert with VPBranchOnMaskRecipe.
2839public:
2840 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
2841 /// nodes after merging back from a Branch-on-Mask.
2843 : VPSingleDefRecipe(VPDef::VPPredInstPHISC, PredV) {}
2844 ~VPPredInstPHIRecipe() override = default;
2845
2847 return new VPPredInstPHIRecipe(getOperand(0));
2848 }
2849
2850 VP_CLASSOF_IMPL(VPDef::VPPredInstPHISC)
2851
2852 /// Generates phi nodes for live-outs (from a replicate region) as needed to
2853 /// retain SSA form.
2854 void execute(VPTransformState &State) override;
2855
2856 /// Return the cost of this VPPredInstPHIRecipe.
2858 VPCostContext &Ctx) const override {
2859 // TODO: Compute accurate cost after retiring the legacy cost model.
2860 return 0;
2861 }
2862
2863#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2864 /// Print the recipe.
2865 void print(raw_ostream &O, const Twine &Indent,
2866 VPSlotTracker &SlotTracker) const override;
2867#endif
2868
2869 /// Returns true if the recipe uses scalars of operand \p Op.
2870 bool usesScalars(const VPValue *Op) const override {
2872 "Op must be an operand of the recipe");
2873 return true;
2874 }
2875};
2876
2877/// A common base class for widening memory operations. An optional mask can be
2878/// provided as the last operand.
2880protected:
2882
2883 /// Whether the accessed addresses are consecutive.
2885
2886 /// Whether the consecutive accessed addresses are in reverse order.
2888
2889 /// Whether the memory access is masked.
2890 bool IsMasked = false;
2891
2892 void setMask(VPValue *Mask) {
2893 assert(!IsMasked && "cannot re-set mask");
2894 if (!Mask)
2895 return;
2896 addOperand(Mask);
2897 IsMasked = true;
2898 }
2899
2900 VPWidenMemoryRecipe(const char unsigned SC, Instruction &I,
2901 std::initializer_list<VPValue *> Operands,
2902 bool Consecutive, bool Reverse, DebugLoc DL)
2904 Reverse(Reverse) {
2905 assert((Consecutive || !Reverse) && "Reverse implies consecutive");
2906 }
2907
2908public:
2910 llvm_unreachable("cloning not supported");
2911 }
2912
2913 static inline bool classof(const VPRecipeBase *R) {
2914 return R->getVPDefID() == VPRecipeBase::VPWidenLoadSC ||
2915 R->getVPDefID() == VPRecipeBase::VPWidenStoreSC ||
2916 R->getVPDefID() == VPRecipeBase::VPWidenLoadEVLSC ||
2917 R->getVPDefID() == VPRecipeBase::VPWidenStoreEVLSC;
2918 }
2919
2920 static inline bool classof(const VPUser *U) {
2921 auto *R = dyn_cast<VPRecipeBase>(U);
2922 return R && classof(R);
2923 }
2924
2925 /// Return whether the loaded-from / stored-to addresses are consecutive.
2926 bool isConsecutive() const { return Consecutive; }
2927
2928 /// Return whether the consecutive loaded/stored addresses are in reverse
2929 /// order.
2930 bool isReverse() const { return Reverse; }
2931
2932 /// Return the address accessed by this recipe.
2933 VPValue *getAddr() const { return getOperand(0); }
2934
2935 /// Returns true if the recipe is masked.
2936 bool isMasked() const { return IsMasked; }
2937
2938 /// Return the mask used by this recipe. Note that a full mask is represented
2939 /// by a nullptr.
2940 VPValue *getMask() const {
2941 // Mask is optional and therefore the last operand.
2942 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
2943 }
2944
2945 /// Generate the wide load/store.
2946 void execute(VPTransformState &State) override {
2947 llvm_unreachable("VPWidenMemoryRecipe should not be instantiated.");
2948 }
2949
2950 /// Return the cost of this VPWidenMemoryRecipe.
2952 VPCostContext &Ctx) const override;
2953
2955};
2956
2957/// A recipe for widening load operations, using the address to load from and an
2958/// optional mask.
2959struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
2961 bool Consecutive, bool Reverse, DebugLoc DL)
2962 : VPWidenMemoryRecipe(VPDef::VPWidenLoadSC, Load, {Addr}, Consecutive,
2963 Reverse, DL),
2964 VPValue(this, &Load) {
2965 setMask(Mask);
2966 }
2967
2969 return new VPWidenLoadRecipe(cast<LoadInst>(Ingredient), getAddr(),
2971 getDebugLoc());
2972 }
2973
2974 VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC);
2975
2976 /// Generate a wide load or gather.
2977 void execute(VPTransformState &State) override;
2978
2979#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2980 /// Print the recipe.
2981 void print(raw_ostream &O, const Twine &Indent,
2982 VPSlotTracker &SlotTracker) const override;
2983#endif
2984
2985 /// Returns true if the recipe only uses the first lane of operand \p Op.
2986 bool onlyFirstLaneUsed(const VPValue *Op) const override {
2988 "Op must be an operand of the recipe");
2989 // Widened, consecutive loads operations only demand the first lane of
2990 // their address.
2991 return Op == getAddr() && isConsecutive();
2992 }
2993};
2994
2995/// A recipe for widening load operations with vector-predication intrinsics,
2996/// using the address to load from, the explicit vector length and an optional
2997/// mask.
2998struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
3000 : VPWidenMemoryRecipe(VPDef::VPWidenLoadEVLSC, L.getIngredient(),
3001 {L.getAddr(), &EVL}, L.isConsecutive(),
3002 L.isReverse(), L.getDebugLoc()),
3003 VPValue(this, &getIngredient()) {
3004 setMask(Mask);
3005 }
3006
3007 VP_CLASSOF_IMPL(VPDef::VPWidenLoadEVLSC)
3008
3009 /// Return the EVL operand.
3010 VPValue *getEVL() const { return getOperand(1); }
3011
3012 /// Generate the wide load or gather.
3013 void execute(VPTransformState &State) override;
3014
3015 /// Return the cost of this VPWidenLoadEVLRecipe.
3017 VPCostContext &Ctx) const override;
3018
3019#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3020 /// Print the recipe.
3021 void print(raw_ostream &O, const Twine &Indent,
3022 VPSlotTracker &SlotTracker) const override;
3023#endif
3024
3025 /// Returns true if the recipe only uses the first lane of operand \p Op.
3026 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3028 "Op must be an operand of the recipe");
3029 // Widened loads only demand the first lane of EVL and consecutive loads
3030 // only demand the first lane of their address.
3031 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3032 }
3033};
3034
3035/// A recipe for widening store operations, using the stored value, the address
3036/// to store to and an optional mask.
3039 VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
3040 : VPWidenMemoryRecipe(VPDef::VPWidenStoreSC, Store, {Addr, StoredVal},
3042 setMask(Mask);
3043 }
3044
3046 return new VPWidenStoreRecipe(cast<StoreInst>(Ingredient), getAddr(),
3048 Reverse, getDebugLoc());
3049 }
3050
3051 VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC);
3052
3053 /// Return the value stored by this recipe.
3054 VPValue *getStoredValue() const { return getOperand(1); }
3055
3056 /// Generate a wide store or scatter.
3057 void execute(VPTransformState &State) override;
3058
3059#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3060 /// Print the recipe.
3061 void print(raw_ostream &O, const Twine &Indent,
3062 VPSlotTracker &SlotTracker) const override;
3063#endif
3064
3065 /// Returns true if the recipe only uses the first lane of operand \p Op.
3066 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3068 "Op must be an operand of the recipe");
3069 // Widened, consecutive stores only demand the first lane of their address,
3070 // unless the same operand is also stored.
3071 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3072 }
3073};
3074
3075/// A recipe for widening store operations with vector-predication intrinsics,
3076/// using the value to store, the address to store to, the explicit vector
3077/// length and an optional mask.
3080 : VPWidenMemoryRecipe(VPDef::VPWidenStoreEVLSC, S.getIngredient(),
3081 {S.getAddr(), S.getStoredValue(), &EVL},
3082 S.isConsecutive(), S.isReverse(), S.getDebugLoc()) {
3083 setMask(Mask);
3084 }
3085
3086 VP_CLASSOF_IMPL(VPDef::VPWidenStoreEVLSC)
3087
3088 /// Return the address accessed by this recipe.
3089 VPValue *getStoredValue() const { return getOperand(1); }
3090
3091 /// Return the EVL operand.
3092 VPValue *getEVL() const { return getOperand(2); }
3093
3094 /// Generate the wide store or scatter.
3095 void execute(VPTransformState &State) override;
3096
3097 /// Return the cost of this VPWidenStoreEVLRecipe.
3099 VPCostContext &Ctx) const override;
3100
3101#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3102 /// Print the recipe.
3103 void print(raw_ostream &O, const Twine &Indent,
3104 VPSlotTracker &SlotTracker) const override;
3105#endif
3106
3107 /// Returns true if the recipe only uses the first lane of operand \p Op.
3108 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3110 "Op must be an operand of the recipe");
3111 if (Op == getEVL()) {
3112 assert(getStoredValue() != Op && "unexpected store of EVL");
3113 return true;
3114 }
3115 // Widened, consecutive memory operations only demand the first lane of
3116 // their address, unless the same operand is also stored. That latter can
3117 // happen with opaque pointers.
3118 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3119 }
3120};
3121
3122/// Recipe to expand a SCEV expression.
3124 const SCEV *Expr;
3125 ScalarEvolution &SE;
3126
3127public:
3129 : VPSingleDefRecipe(VPDef::VPExpandSCEVSC, {}), Expr(Expr), SE(SE) {}
3130
3131 ~VPExpandSCEVRecipe() override = default;
3132
3134 return new VPExpandSCEVRecipe(Expr, SE);
3135 }
3136
3137 VP_CLASSOF_IMPL(VPDef::VPExpandSCEVSC)
3138
3139 /// Generate a canonical vector induction variable of the vector loop, with
3140 void execute(VPTransformState &State) override;
3141
3142 /// Return the cost of this VPExpandSCEVRecipe.
3144 VPCostContext &Ctx) const override {
3145 // TODO: Compute accurate cost after retiring the legacy cost model.
3146 return 0;
3147 }
3148
3149#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3150 /// Print the recipe.
3151 void print(raw_ostream &O, const Twine &Indent,
3152 VPSlotTracker &SlotTracker) const override;
3153#endif
3154
3155 const SCEV *getSCEV() const { return Expr; }
3156};
3157
3158/// Canonical scalar induction phi of the vector loop. Starting at the specified
3159/// start value (either 0 or the resume value when vectorizing the epilogue
3160/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
3161/// canonical induction variable.
3163public:
3165 : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
3166
3167 ~VPCanonicalIVPHIRecipe() override = default;
3168
3170 auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
3171 R->addOperand(getBackedgeValue());
3172 return R;
3173 }
3174
3175 VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
3176
3178 return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
3179 }
3180
3181 void execute(VPTransformState &State) override {
3183 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3184 }
3185
3186#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3187 /// Print the recipe.
3188 void print(raw_ostream &O, const Twine &Indent,
3189 VPSlotTracker &SlotTracker) const override;
3190#endif
3191
3192 /// Returns the scalar type of the induction.
3194 return getStartValue()->getLiveInIRValue()->getType();
3195 }
3196
3197 /// Returns true if the recipe only uses the first lane of operand \p Op.
3198 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3200 "Op must be an operand of the recipe");
3201 return true;
3202 }
3203
3204 /// Returns true if the recipe only uses the first part of operand \p Op.
3205 bool onlyFirstPartUsed(const VPValue *Op) const override {
3207 "Op must be an operand of the recipe");
3208 return true;
3209 }
3210
3211 /// Check if the induction described by \p Kind, /p Start and \p Step is
3212 /// canonical, i.e. has the same start and step (of 1) as the canonical IV.
3214 VPValue *Step) const;
3215
3216 /// Return the cost of this VPCanonicalIVPHIRecipe.
3218 VPCostContext &Ctx) const override {
3219 // For now, match the behavior of the legacy cost model.
3220 return 0;
3221 }
3222};
3223
3224/// A recipe for generating the active lane mask for the vector loop that is
3225/// used to predicate the vector operations.
3226/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
3227/// remove VPActiveLaneMaskPHIRecipe.
3229public:
3231 : VPHeaderPHIRecipe(VPDef::VPActiveLaneMaskPHISC, nullptr, StartMask,
3232 DL) {}
3233
3234 ~VPActiveLaneMaskPHIRecipe() override = default;
3235
3238 if (getNumOperands() == 2)
3239 R->addOperand(getOperand(1));
3240 return R;
3241 }
3242
3243 VP_CLASSOF_IMPL(VPDef::VPActiveLaneMaskPHISC)
3244
3246 return D->getVPDefID() == VPDef::VPActiveLaneMaskPHISC;
3247 }
3248
3249 /// Generate the active lane mask phi of the vector loop.
3250 void execute(VPTransformState &State) override;
3251
3252#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3253 /// Print the recipe.
3254 void print(raw_ostream &O, const Twine &Indent,
3255 VPSlotTracker &SlotTracker) const override;
3256#endif
3257};
3258
3259/// A recipe for generating the phi node for the current index of elements,
3260/// adjusted in accordance with EVL value. It starts at the start value of the
3261/// canonical induction and gets incremented by EVL in each iteration of the
3262/// vector loop.
3264public:
3266 : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {}
3267
3268 ~VPEVLBasedIVPHIRecipe() override = default;
3269
3271 llvm_unreachable("cloning not implemented yet");
3272 }
3273
3274 VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC)
3275
3277 return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
3278 }
3279
3280 void execute(VPTransformState &State) override {
3282 "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
3283 }
3284
3285 /// Return the cost of this VPEVLBasedIVPHIRecipe.
3287 VPCostContext &Ctx) const override {
3288 // For now, match the behavior of the legacy cost model.
3289 return 0;
3290 }
3291
3292 /// Returns true if the recipe only uses the first lane of operand \p Op.
3293 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3295 "Op must be an operand of the recipe");
3296 return true;
3297 }
3298
3299#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3300 /// Print the recipe.
3301 void print(raw_ostream &O, const Twine &Indent,
3302 VPSlotTracker &SlotTracker) const override;
3303#endif
3304};
3305
3306/// A Recipe for widening the canonical induction variable of the vector loop.
3308 public VPUnrollPartAccessor<1> {
3309public:
3311 : VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
3312
3313 ~VPWidenCanonicalIVRecipe() override = default;
3314
3316 return new VPWidenCanonicalIVRecipe(
3317 cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
3318 }
3319
3320 VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
3321
3322 /// Generate a canonical vector induction variable of the vector loop, with
3323 /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
3324 /// step = <VF*UF, VF*UF, ..., VF*UF>.
3325 void execute(VPTransformState &State) override;
3326
3327 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
3329 VPCostContext &Ctx) const override {
3330 // TODO: Compute accurate cost after retiring the legacy cost model.
3331 return 0;
3332 }
3333
3334#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3335 /// Print the recipe.
3336 void print(raw_ostream &O, const Twine &Indent,
3337 VPSlotTracker &SlotTracker) const override;
3338#endif
3339};
3340
3341/// A recipe for converting the input value \p IV value to the corresponding
3342/// value of an IV with different start and step values, using Start + IV *
3343/// Step.
3345 /// Kind of the induction.
3347 /// If not nullptr, the floating point induction binary operator. Must be set
3348 /// for floating point inductions.
3349 const FPMathOperator *FPBinOp;
3350
3351 /// Name to use for the generated IR instruction for the derived IV.
3352 std::string Name;
3353
3354public:
3356 VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
3357 const Twine &Name = "")
3359 IndDesc.getKind(),
3360 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
3361 Start, CanonicalIV, Step, Name) {}
3362
3364 const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV,
3365 VPValue *Step, const Twine &Name = "")
3366 : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind),
3367 FPBinOp(FPBinOp), Name(Name.str()) {}
3368
3369 ~VPDerivedIVRecipe() override = default;
3370
3372 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
3373 getStepValue());
3374 }
3375
3376 VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC)
3377
3378 /// Generate the transformed value of the induction at offset StartValue (1.
3379 /// operand) + IV (2. operand) * StepValue (3, operand).
3380 void execute(VPTransformState &State) override;
3381
3382 /// Return the cost of this VPDerivedIVRecipe.
3384 VPCostContext &Ctx) const override {
3385 // TODO: Compute accurate cost after retiring the legacy cost model.
3386 return 0;
3387 }
3388
3389#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3390 /// Print the recipe.
3391 void print(raw_ostream &O, const Twine &Indent,
3392 VPSlotTracker &SlotTracker) const override;
3393#endif
3394
3396 return getStartValue()->getLiveInIRValue()->getType();
3397 }
3398
3399 VPValue *getStartValue() const { return getOperand(0); }
3400 VPValue *getStepValue() const { return getOperand(2); }
3401
3402 /// Returns true if the recipe only uses the first lane of operand \p Op.
3403 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3405 "Op must be an operand of the recipe");
3406 return true;
3407 }
3408};
3409
3410/// A recipe for handling phi nodes of integer and floating-point inductions,
3411/// producing their scalar values.
3413 public VPUnrollPartAccessor<2> {
3414 Instruction::BinaryOps InductionOpcode;
3415
3416public:
3419 : VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3420 ArrayRef<VPValue *>({IV, Step}), FMFs),
3421 InductionOpcode(Opcode) {}
3422
3424 VPValue *Step)
3426 IV, Step, IndDesc.getInductionOpcode(),
3427 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
3428 ? IndDesc.getInductionBinOp()->getFastMathFlags()
3429 : FastMathFlags()) {}
3430
3431 ~VPScalarIVStepsRecipe() override = default;
3432
3434 return new VPScalarIVStepsRecipe(
3435 getOperand(0), getOperand(1), InductionOpcode,
3437 }
3438
3439 VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
3440
3441 /// Generate the scalarized versions of the phi node as needed by their users.
3442 void execute(VPTransformState &State) override;
3443
3444 /// Return the cost of this VPScalarIVStepsRecipe.
3446 VPCostContext &Ctx) const override {
3447 // TODO: Compute accurate cost after retiring the legacy cost model.
3448 return 0;
3449 }
3450
3451#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3452 /// Print the recipe.
3453 void print(raw_ostream &O, const Twine &Indent,
3454 VPSlotTracker &SlotTracker) const override;
3455#endif
3456
3457 VPValue *getStepValue() const { return getOperand(1); }
3458
3459 /// Returns true if the recipe only uses the first lane of operand \p Op.
3460 bool onlyFirstLaneUsed(const VPValue *Op) const override {
3462 "Op must be an operand of the recipe");
3463 return true;
3464 }
3465};
3466
3467/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
3468/// holds a sequence of zero or more VPRecipe's each representing a sequence of
3469/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
3471public:
3473
3474protected:
3475 /// The VPRecipes held in the order of output instructions to generate.
3477
3478 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
3479 : VPBlockBase(BlockSC, Name.str()) {}
3480
3481public:
3482 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
3483 : VPBlockBase(VPBasicBlockSC, Name.str()) {
3484 if (Recipe)
3485 appendRecipe(Recipe);
3486 }
3487
3488 ~VPBasicBlock() override {
3489 while (!Recipes.empty())
3490 Recipes.pop_back();
3491 }
3492
3493 /// Instruction iterators...
3498
3499 //===--------------------------------------------------------------------===//
3500 /// Recipe iterator methods
3501 ///
3502 inline iterator begin() { return Recipes.begin(); }
3503 inline const_iterator begin() const { return Recipes.begin(); }
3504 inline iterator end() { return Recipes.end(); }
3505 inline const_iterator end() const { return Recipes.end(); }
3506
3507 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
3508 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
3509 inline reverse_iterator rend() { return Recipes.rend(); }
3510 inline const_reverse_iterator rend() const { return Recipes.rend(); }
3511
3512 inline size_t size() const { return Recipes.size(); }
3513 inline bool empty() const { return Recipes.empty(); }
3514 inline const VPRecipeBase &front() const { return Recipes.front(); }
3515 inline VPRecipeBase &front() { return Recipes.front(); }
3516 inline const VPRecipeBase &back() const { return Recipes.back(); }
3517 inline VPRecipeBase &back() { return Recipes.back(); }
3518
3519 /// Returns a reference to the list of recipes.
3521
3522 /// Returns a pointer to a member of the recipe list.
3524 return &VPBasicBlock::Recipes;
3525 }
3526
3527 /// Method to support type inquiry through isa, cast, and dyn_cast.
3528 static inline bool classof(const VPBlockBase *V) {
3529 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
3530 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3531 }
3532
3533 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
3534 assert(Recipe && "No recipe to append.");
3535 assert(!Recipe->Parent && "Recipe already in VPlan");
3536 Recipe->Parent = this;
3537 Recipes.insert(InsertPt, Recipe);
3538 }
3539
3540 /// Augment the existing recipes of a VPBasicBlock with an additional
3541 /// \p Recipe as the last recipe.
3542 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
3543
3544 /// The method which generates the output IR instructions that correspond to
3545 /// this VPBasicBlock, thereby "executing" the VPlan.
3546 void execute(VPTransformState *State) override;
3547
3548 /// Return the cost of this VPBasicBlock.
3550
3551 /// Return the position of the first non-phi node recipe in the block.
3553
3554 /// Returns an iterator range over the PHI-like recipes in the block.
3556 return make_range(begin(), getFirstNonPhi());
3557 }
3558
3559 void dropAllReferences(VPValue *NewValue) override;
3560
3561 /// Split current block at \p SplitAt by inserting a new block between the
3562 /// current block and its successors and moving all recipes starting at
3563 /// SplitAt to the new block. Returns the new block.
3564 VPBasicBlock *splitAt(iterator SplitAt);
3565
3568
3569#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3570 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
3571 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
3572 ///
3573 /// Note that the numbering is applied to the whole VPlan, so printing
3574 /// individual blocks is consistent with the whole VPlan printing.
3575 void print(raw_ostream &O, const Twine &Indent,
3576 VPSlotTracker &SlotTracker) const override;
3577 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3578#endif
3579
3580 /// If the block has multiple successors, return the branch recipe terminating
3581 /// the block. If there are no or only a single successor, return nullptr;
3583 const VPRecipeBase *getTerminator() const;
3584
3585 /// Returns true if the block is exiting it's parent region.
3586 bool isExiting() const;
3587
3588 /// Clone the current block and it's recipes, without updating the operands of
3589 /// the cloned recipes.
3590 VPBasicBlock *clone() override {
3591 auto *NewBlock = new VPBasicBlock(getName());
3592 for (VPRecipeBase &R : *this)
3593 NewBlock->appendRecipe(R.clone());
3594 return NewBlock;
3595 }
3596
3597protected:
3598 /// Execute the recipes in the IR basic block \p BB.
3599 void executeRecipes(VPTransformState *State, BasicBlock *BB);
3600
3601 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
3602 /// generated for this VPBB.
3604
3605private:
3606 /// Create an IR BasicBlock to hold the output instructions generated by this
3607 /// VPBasicBlock, and return it. Update the CFGState accordingly.
3608 BasicBlock *createEmptyBasicBlock(VPTransformState::CFGState &CFG);
3609};
3610
3611/// A special type of VPBasicBlock that wraps an existing IR basic block.
3612/// Recipes of the block get added before the first non-phi instruction in the
3613/// wrapped block.
3614/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
3615/// preheader block.
3617 BasicBlock *IRBB;
3618
3619public:
3621 : VPBasicBlock(VPIRBasicBlockSC,
3622 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
3623 IRBB(IRBB) {}
3624
3625 ~VPIRBasicBlock() override {}
3626
3627 static inline bool classof(const VPBlockBase *V) {
3628 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
3629 }
3630
3631 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
3632 /// instructions in \p IRBB, except its terminator which is managed in VPlan.
3634
3635 /// The method which generates the output IR instructions that correspond to
3636 /// this VPBasicBlock, thereby "executing" the VPlan.
3637 void execute(VPTransformState *State) override;
3638
3639 VPIRBasicBlock *clone() override {
3640 auto *NewBlock = new VPIRBasicBlock(IRBB);
3641 for (VPRecipeBase &R : Recipes)
3642 NewBlock->appendRecipe(R.clone());
3643 return NewBlock;
3644 }
3645
3646 BasicBlock *getIRBasicBlock() const { return IRBB; }
3647};
3648
3649/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
3650/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
3651/// A VPRegionBlock may indicate that its contents are to be replicated several
3652/// times. This is designed to support predicated scalarization, in which a
3653/// scalar if-then code structure needs to be generated VF * UF times. Having
3654/// this replication indicator helps to keep a single model for multiple
3655/// candidate VF's. The actual replication takes place only once the desired VF
3656/// and UF have been determined.
3658 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
3659 VPBlockBase *Entry;
3660
3661 /// Hold the Single Exiting block of the SESE region modelled by the
3662 /// VPRegionBlock.
3663 VPBlockBase *Exiting;
3664
3665 /// An indicator whether this region is to generate multiple replicated
3666 /// instances of output IR corresponding to its VPBlockBases.
3667 bool IsReplicator;
3668
3669public:
3671 const std::string &Name = "", bool IsReplicator = false)
3672 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
3673 IsReplicator(IsReplicator) {
3674 assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
3675 assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
3676 Entry->setParent(this);
3677 Exiting->setParent(this);
3678 }
3679 VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
3680 : VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
3681 IsReplicator(IsReplicator) {}
3682
3683 ~VPRegionBlock() override {
3684 if (Entry) {
3685 VPValue DummyValue;
3686 Entry->dropAllReferences(&DummyValue);
3687 deleteCFG(Entry);
3688 }
3689 }
3690
3691 /// Method to support type inquiry through isa, cast, and dyn_cast.
3692 static inline bool classof(const VPBlockBase *V) {
3693 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
3694 }
3695
3696 const VPBlockBase *getEntry() const { return Entry; }
3697 VPBlockBase *getEntry() { return Entry; }
3698
3699 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
3700 /// EntryBlock must have no predecessors.
3701 void setEntry(VPBlockBase *EntryBlock) {
3702 assert(EntryBlock->getPredecessors().empty() &&
3703 "Entry block cannot have predecessors.");
3704 Entry = EntryBlock;
3705 EntryBlock->setParent(this);
3706 }
3707
3708 const VPBlockBase *getExiting() const { return Exiting; }
3709 VPBlockBase *getExiting() { return Exiting; }
3710
3711 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
3712 /// ExitingBlock must have no successors.
3713 void setExiting(VPBlockBase *ExitingBlock) {
3714 assert(ExitingBlock->getSuccessors().empty() &&
3715 "Exit block cannot have successors.");
3716 Exiting = ExitingBlock;
3717 ExitingBlock->setParent(this);
3718 }
3719
3720 /// Returns the pre-header VPBasicBlock of the loop region.
3722 assert(!isReplicator() && "should only get pre-header of loop regions");
3724 }
3725
3726 /// An indicator whether this region is to generate multiple replicated
3727 /// instances of output IR corresponding to its VPBlockBases.
3728 bool isReplicator() const { return IsReplicator; }
3729
3730 /// The method which generates the output IR instructions that correspond to
3731 /// this VPRegionBlock, thereby "executing" the VPlan.
3732 void execute(VPTransformState *State) override;
3733
3734 // Return the cost of this region.
3736
3737 void dropAllReferences(VPValue *NewValue) override;
3738
3739#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3740 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
3741 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
3742 /// consequtive numbers.
3743 ///
3744 /// Note that the numbering is applied to the whole VPlan, so printing
3745 /// individual regions is consistent with the whole VPlan printing.
3746 void print(raw_ostream &O, const Twine &Indent,
3747 VPSlotTracker &SlotTracker) const override;
3748 using VPBlockBase::print; // Get the print(raw_stream &O) version.
3749#endif
3750
3751 /// Clone all blocks in the single-entry single-exit region of the block and
3752 /// their recipes without updating the operands of the cloned recipes.
3753 VPRegionBlock *clone() override;
3754};
3755
3756/// VPlan models a candidate for vectorization, encoding various decisions take
3757/// to produce efficient output IR, including which branches, basic-blocks and
3758/// output IR instructions to generate, and their cost. VPlan holds a
3759/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
3760/// VPBasicBlock.
3761class VPlan {
3762 friend class VPlanPrinter;
3763 friend class VPSlotTracker;
3764
3765 /// VPBasicBlock corresponding to the original preheader. Used to place
3766 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
3767 /// rest of VPlan execution.
3768 /// When this VPlan is used for the epilogue vector loop, the entry will be
3769 /// replaced by a new entry block created during skeleton creation.
3770 VPBasicBlock *Entry;
3771
3772 /// VPIRBasicBlock wrapping the header of the original scalar loop.
3773 VPIRBasicBlock *ScalarHeader;
3774
3775 /// Holds the VFs applicable to this VPlan.
3777
3778 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
3779 /// any UF.
3781
3782 /// Holds the name of the VPlan, for printing.
3783 std::string Name;
3784
3785 /// Represents the trip count of the original loop, for folding
3786 /// the tail.
3787 VPValue *TripCount = nullptr;
3788
3789 /// Represents the backedge taken count of the original loop, for folding
3790 /// the tail. It equals TripCount - 1.
3791 VPValue *BackedgeTakenCount = nullptr;
3792
3793 /// Represents the vector trip count.
3794 VPValue VectorTripCount;
3795
3796 /// Represents the vectorization factor of the loop.
3797 VPValue VF;
3798
3799 /// Represents the loop-invariant VF * UF of the vector loop region.
3800 VPValue VFxUF;
3801
3802 /// Holds a mapping between Values and their corresponding VPValue inside
3803 /// VPlan.
3804 Value2VPValueTy Value2VPValue;
3805
3806 /// Contains all the external definitions created for this VPlan. External
3807 /// definitions are VPValues that hold a pointer to their underlying IR.
3808 SmallVector<VPValue *, 16> VPLiveInsToFree;
3809
3810 /// Mapping from SCEVs to the VPValues representing their expansions.
3811 /// NOTE: This mapping is temporary and will be removed once all users have
3812 /// been modeled in VPlan directly.
3813 DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
3814
3815public:
3816 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
3817 /// wrapping the original header of the scalar loop.
3818 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
3819 : Entry(Entry), ScalarHeader(ScalarHeader) {
3820 Entry->setPlan(this);
3821 assert(ScalarHeader->getNumSuccessors() == 0 &&
3822 "scalar header must be a leaf node");
3823 }
3824
3825 /// Construct a VPlan with \p Entry entering the plan, trip count \p TC and
3826 /// with \p ScalarHeader wrapping the original header of the scalar loop.
3827 VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
3828 : VPlan(Entry, ScalarHeader) {
3829 TripCount = TC;
3830 }
3831
3832 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
3833 /// original preheader and scalar header of \p L, to be used as entry and
3834 /// scalar header blocks of the new VPlan.
3835 VPlan(Loop *L);
3836
3837 ~VPlan();
3838
3840 Entry = VPBB;
3841 VPBB->setPlan(this);
3842 }
3843
3844 /// Create initial VPlan, having an "entry" VPBasicBlock (wrapping
3845 /// original scalar pre-header) which contains SCEV expansions that need
3846 /// to happen before the CFG is modified (when executing a VPlan for the
3847 /// epilogue vector loop, the original entry needs to be replaced by a new
3848 /// one); a VPBasicBlock for the vector pre-header, followed by a region for
3849 /// the vector loop, followed by the middle VPBasicBlock. If a check is needed
3850 /// to guard executing the scalar epilogue loop, it will be added to the
3851 /// middle block, together with VPBasicBlocks for the scalar preheader and
3852 /// exit blocks. \p InductionTy is the type of the canonical induction and
3853 /// used for related values, like the trip count expression.
3854 static VPlanPtr createInitialVPlan(Type *InductionTy,
3856 bool RequiresScalarEpilogueCheck,
3857 bool TailFolded, Loop *TheLoop);
3858
3859 /// Prepare the plan for execution, setting up the required live-in values.
3860 void prepareToExecute(Value *TripCount, Value *VectorTripCount,
3861 VPTransformState &State);
3862
3863 /// Generate the IR code for this VPlan.
3864 void execute(VPTransformState *State);
3865
3866 /// Return the cost of this plan.
3868
3869 VPBasicBlock *getEntry() { return Entry; }
3870 const VPBasicBlock *getEntry() const { return Entry; }
3871
3872 /// Returns the preheader of the vector loop region.
3874 return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
3875 }
3876
3877 /// Returns the VPRegionBlock of the vector loop.
3879 const VPRegionBlock *getVectorLoopRegion() const;
3880
3881 /// Returns the 'middle' block of the plan, that is the block that selects
3882 /// whether to execute the scalar tail loop or the exit block from the loop
3883 /// latch.
3885 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3886 }
3888 return cast<VPBasicBlock>(getScalarPreheader()->getPredecessors().front());
3889 }
3890
3891 /// Return the VPBasicBlock for the preheader of the scalar loop.
3893 return cast<VPBasicBlock>(getScalarHeader()->getSinglePredecessor());
3894 }
3895
3896 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
3897 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
3898
3899 /// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
3900 /// of the VPlan, that is leaf nodes except the scalar header. Defined in
3901 /// VPlanHCFG, as the definition of the type needs access to the definitions
3902 /// of VPBlockShallowTraversalWrapper.
3903 auto getExitBlocks();
3904
3905 /// The trip count of the original loop.
3907 assert(TripCount && "trip count needs to be set before accessing it");
3908 return TripCount;
3909 }
3910
3911 /// Resets the trip count for the VPlan. The caller must make sure all uses of
3912 /// the original trip count have been replaced.
3913 void resetTripCount(VPValue *NewTripCount) {
3914 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
3915 "TripCount always must be set");
3916 TripCount = NewTripCount;
3917 }
3918
3919 /// The backedge taken count of the original loop.
3921 if (!BackedgeTakenCount)
3922 BackedgeTakenCount = new VPValue();
3923 return BackedgeTakenCount;
3924 }
3925
3926 /// The vector trip count.
3927 VPValue &getVectorTripCount() { return VectorTripCount; }
3928
3929 /// Returns the VF of the vector loop region.
3930 VPValue &getVF() { return VF; };
3931
3932 /// Returns VF * UF of the vector loop region.
3933 VPValue &getVFxUF() { return VFxUF; }
3934
3935 void addVF(ElementCount VF) { VFs.insert(VF); }
3936
3938 assert(hasVF(VF) && "Cannot set VF not already in plan");
3939 VFs.clear();
3940 VFs.insert(VF);
3941 }
3942
3943 bool hasVF(ElementCount VF) { return VFs.count(VF); }
3945 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
3946 }
3947
3948 /// Returns an iterator range over all VFs of the plan.
3951 return {VFs.begin(), VFs.end()};
3952 }
3953
3954 bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); }
3955
3956 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
3957
3958 unsigned getUF() const {
3959 assert(UFs.size() == 1 && "Expected a single UF");
3960 return UFs[0];
3961 }
3962
3963 void setUF(unsigned UF) {
3964 assert(hasUF(UF) && "Cannot set the UF not already in plan");
3965 UFs.clear();
3966 UFs.insert(UF);
3967 }
3968
3969 /// Return a string with the name of the plan and the applicable VFs and UFs.
3970 std::string getName() const;
3971
3972 void setName(const Twine &newName) { Name = newName.str(); }
3973
3974 /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists
3975 /// yet) for \p V.
3977 assert(V && "Trying to get or add the VPValue of a null Value");
3978 if (!Value2VPValue.count(V)) {
3979 VPValue *VPV = new VPValue(V);
3980 VPLiveInsToFree.push_back(VPV);
3981 assert(VPV->isLiveIn() && "VPV must be a live-in.");
3982 assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
3983 Value2VPValue[V] = VPV;
3984 }
3985
3986 assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
3987 assert(Value2VPValue[V]->isLiveIn() &&
3988 "Only live-ins should be in mapping");
3989 return Value2VPValue[V];
3990 }
3991
3992 /// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3993 VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3994
3995#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3996 /// Print the live-ins of this VPlan to \p O.
3997 void printLiveIns(raw_ostream &O) const;
3998
3999 /// Print this VPlan to \p O.
4000 void print(raw_ostream &O) const;
4001
4002 /// Print this VPlan in DOT format to \p O.
4003 void printDOT(raw_ostream &O) const;
4004
4005 /// Dump the plan to stderr (for debugging).
4006 LLVM_DUMP_METHOD void dump() const;
4007#endif
4008
4009 /// Returns the canonical induction recipe of the vector loop.
4012 if (EntryVPBB->empty()) {
4013 // VPlan native path.
4014 EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
4015 }
4016 return cast<VPCanonicalIVPHIRecipe>(&*EntryVPBB->begin());
4017 }
4018
4019 VPValue *getSCEVExpansion(const SCEV *S) const {
4020 return SCEVToExpansion.lookup(S);
4021 }
4022
4023 void addSCEVExpansion(const SCEV *S, VPValue *V) {
4024 assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
4025 SCEVToExpansion[S] = V;
4026 }
4027
4028 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
4029 /// recipes to refer to the clones, and return it.
4030 VPlan *duplicate();
4031};
4032
4033#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4034/// VPlanPrinter prints a given VPlan to a given output stream. The printing is
4035/// indented and follows the dot format.
4037 raw_ostream &OS;
4038 const VPlan &Plan;
4039 unsigned Depth = 0;
4040 unsigned TabWidth = 2;
4041 std::string Indent;
4042 unsigned BID = 0;
4044
4046
4047 /// Handle indentation.
4048 void bumpIndent(int b) { Indent = std::string((Depth += b) * TabWidth, ' '); }
4049
4050 /// Print a given \p Block of the Plan.
4051 void dumpBlock(const VPBlockBase *Block);
4052
4053 /// Print the information related to the CFG edges going out of a given
4054 /// \p Block, followed by printing the successor blocks themselves.
4055 void dumpEdges(const VPBlockBase *Block);
4056
4057 /// Print a given \p BasicBlock, including its VPRecipes, followed by printing
4058 /// its successor blocks.
4059 void dumpBasicBlock(const VPBasicBlock *BasicBlock);
4060
4061 /// Print a given \p Region of the Plan.
4062 void dumpRegion(const VPRegionBlock *Region);
4063
4064 unsigned getOrCreateBID(const VPBlockBase *Block) {
4065 return BlockID.count(Block) ? BlockID[Block] : BlockID[Block] = BID++;
4066 }
4067
4068 Twine getOrCreateName(const VPBlockBase *Block);
4069
4070 Twine getUID(const VPBlockBase *Block);
4071
4072 /// Print the information related to a CFG edge between two VPBlockBases.
4073 void drawEdge(const VPBlockBase *From, const VPBlockBase *To, bool Hidden,
4074 const Twine &Label);
4075
4076public:
4078 : OS(O), Plan(P), SlotTracker(&P) {}
4079
4080 LLVM_DUMP_METHOD void dump();
4081};
4082
4084 const Value *V;
4085
4086 VPlanIngredient(const Value *V) : V(V) {}
4087
4088 void print(raw_ostream &O) const;
4089};
4090
4092 I.print(OS);
4093 return OS;
4094}
4095
4097 Plan.print(OS);
4098 return OS;
4099}
4100#endif
4101
4102//===----------------------------------------------------------------------===//
4103// VPlan Utilities
4104//===----------------------------------------------------------------------===//
4105
4106/// Class that provides utilities for VPBlockBases in VPlan.
4108public:
4109 VPBlockUtils() = delete;
4110
4111 /// Insert disconnected VPBlockBase \p NewBlock after \p BlockPtr. Add \p
4112 /// NewBlock as successor of \p BlockPtr and \p BlockPtr as predecessor of \p
4113 /// NewBlock, and propagate \p BlockPtr parent to \p NewBlock. \p BlockPtr's
4114 /// successors are moved from \p BlockPtr to \p NewBlock. \p NewBlock must
4115 /// have neither successors nor predecessors.
4116 static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4117 assert(NewBlock->getSuccessors().empty() &&
4118 NewBlock->getPredecessors().empty() &&
4119 "Can't insert new block with predecessors or successors.");
4120 NewBlock->setParent(BlockPtr->getParent());
4121 SmallVector<VPBlockBase *> Succs(BlockPtr->successors());
4122 for (VPBlockBase *Succ : Succs) {
4123 disconnectBlocks(BlockPtr, Succ);
4124 connectBlocks(NewBlock, Succ);
4125 }
4126 connectBlocks(BlockPtr, NewBlock);
4127 }
4128
4129 /// Insert disconnected block \p NewBlock before \p Blockptr. First
4130 /// disconnects all predecessors of \p BlockPtr and connects them to \p
4131 /// NewBlock. Add \p NewBlock as predecessor of \p BlockPtr and \p BlockPtr as
4132 /// successor of \p NewBlock.
4133 static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr) {
4134 assert(NewBlock->getSuccessors().empty() &&
4135 NewBlock->getPredecessors().empty() &&
4136 "Can't insert new block with predecessors or successors.");
4137 NewBlock->setParent(BlockPtr->getParent());
4138 for (VPBlockBase *Pred : to_vector(BlockPtr->predecessors())) {
4139 disconnectBlocks(Pred, BlockPtr);
4140 connectBlocks(Pred, NewBlock);
4141 }
4142 connectBlocks(NewBlock, BlockPtr);
4143 }
4144
4145 /// Insert disconnected VPBlockBases \p IfTrue and \p IfFalse after \p
4146 /// BlockPtr. Add \p IfTrue and \p IfFalse as succesors of \p BlockPtr and \p
4147 /// BlockPtr as predecessor of \p IfTrue and \p IfFalse. Propagate \p BlockPtr
4148 /// parent to \p IfTrue and \p IfFalse. \p BlockPtr must have no successors
4149 /// and \p IfTrue and \p IfFalse must have neither successors nor
4150 /// predecessors.
4151 static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse,
4152 VPBlockBase *BlockPtr) {
4153 assert(IfTrue->getSuccessors().empty() &&
4154 "Can't insert IfTrue with successors.");
4155 assert(IfFalse->getSuccessors().empty() &&
4156 "Can't insert IfFalse with successors.");
4157 BlockPtr->setTwoSuccessors(IfTrue, IfFalse);
4158 IfTrue->setPredecessors({BlockPtr});
4159 IfFalse->setPredecessors({BlockPtr});
4160 IfTrue->setParent(BlockPtr->getParent());
4161 IfFalse->setParent(BlockPtr->getParent());
4162 }
4163
4164 /// Connect VPBlockBases \p From and \p To bi-directionally. If \p PredIdx is
4165 /// -1, append \p From to the predecessors of \p To, otherwise set \p To's
4166 /// predecessor at \p PredIdx to \p From. If \p SuccIdx is -1, append \p To to
4167 /// the successors of \p From, otherwise set \p From's successor at \p SuccIdx
4168 /// to \p To. Both VPBlockBases must have the same parent, which can be null.
4169 /// Both VPBlockBases can be already connected to other VPBlockBases.
4171 unsigned PredIdx = -1u, unsigned SuccIdx = -1u) {
4172 assert((From->getParent() == To->getParent()) &&
4173 "Can't connect two block with different parents");
4174 assert((SuccIdx != -1u || From->getNumSuccessors() < 2) &&
4175 "Blocks can't have more than two successors.");
4176 if (SuccIdx == -1u)
4177 From->appendSuccessor(To);
4178 else
4179 From->getSuccessors()[SuccIdx] = To;
4180
4181 if (PredIdx == -1u)
4182 To->appendPredecessor(From);
4183 else
4184 To->getPredecessors()[PredIdx] = From;
4185 }
4186
4187 /// Disconnect VPBlockBases \p From and \p To bi-directionally. Remove \p To
4188 /// from the successors of \p From and \p From from the predecessors of \p To.
4190 assert(To && "Successor to disconnect is null.");
4191 From->removeSuccessor(To);
4192 To->removePredecessor(From);
4193 }
4194
4195 /// Reassociate all the blocks connected to \p Old so that they now point to
4196 /// \p New.
4198 for (auto *Pred : to_vector(Old->getPredecessors()))
4199 Pred->replaceSuccessor(Old, New);
4200 for (auto *Succ : to_vector(Old->getSuccessors()))
4201 Succ->replacePredecessor(Old, New);
4202 New->setPredecessors(Old->getPredecessors());
4203 New->setSuccessors(Old->getSuccessors());
4204 Old->clearPredecessors();
4205 Old->clearSuccessors();
4206 }
4207
4208 /// Return an iterator range over \p Range which only includes \p BlockTy
4209 /// blocks. The accesses are casted to \p BlockTy.
4210 template <typename BlockTy, typename T>
4211 static auto blocksOnly(const T &Range) {
4212 // Create BaseTy with correct const-ness based on BlockTy.
4213 using BaseTy = std::conditional_t<std::is_const<BlockTy>::value,
4214 const VPBlockBase, VPBlockBase>;
4215
4216 // We need to first create an iterator range over (const) BlocktTy & instead
4217 // of (const) BlockTy * for filter_range to work properly.
4218 auto Mapped =
4219 map_range(Range, [](BaseTy *Block) -> BaseTy & { return *Block; });
4221 Mapped, [](BaseTy &Block) { return isa<BlockTy>(&Block); });
4222 return map_range(Filter, [](BaseTy &Block) -> BlockTy * {
4223 return cast<BlockTy>(&Block);
4224 });
4225 }
4226
4227 /// Inserts \p BlockPtr on the edge between \p From and \p To. That is, update
4228 /// \p From's successor to \p To to point to \p BlockPtr and \p To's
4229 /// predecessor from \p From to \p BlockPtr. \p From and \p To are added to \p
4230 /// BlockPtr's predecessors and successors respectively. There must be a
4231 /// single edge between \p From and \p To.
4233 VPBlockBase *BlockPtr) {
4234 auto &Successors = From->getSuccessors();
4235 auto &Predecessors = To->getPredecessors();
4236 assert(count(Successors, To) == 1 && count(Predecessors, From) == 1 &&
4237 "must have single between From and To");
4238 unsigned SuccIdx = std::distance(Successors.begin(), find(Successors, To));
4239 unsigned PredIx =
4240 std::distance(Predecessors.begin(), find(Predecessors, From));
4241 VPBlockUtils::connectBlocks(From, BlockPtr, -1, SuccIdx);
4242 VPBlockUtils::connectBlocks(BlockPtr, To, PredIx, -1);
4243 }
4244};
4245
4248 InterleaveGroupMap;
4249
4250 /// Type for mapping of instruction based interleave groups to VPInstruction
4251 /// interleave groups
4254
4255 /// Recursively \p Region and populate VPlan based interleave groups based on
4256 /// \p IAI.
4257 void visitRegion(VPRegionBlock *Region, Old2NewTy &Old2New,
4259 /// Recursively traverse \p Block and populate VPlan based interleave groups
4260 /// based on \p IAI.
4261 void visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
4263
4264public:
4266
4269 // Avoid releasing a pointer twice.
4270 for (auto &I : InterleaveGroupMap)
4271 DelSet.insert(I.second);
4272 for (auto *Ptr : DelSet)
4273 delete Ptr;
4274 }
4275
4276 /// Get the interleave group that \p Instr belongs to.
4277 ///
4278 /// \returns nullptr if doesn't have such group.
4281 return InterleaveGroupMap.lookup(Instr);
4282 }
4283};
4284
4285/// Class that maps (parts of) an existing VPlan to trees of combined
4286/// VPInstructions.
4288 enum class OpMode { Failed, Load, Opcode };
4289
4290 /// A DenseMapInfo implementation for using SmallVector<VPValue *, 4> as
4291 /// DenseMap keys.
4292 struct BundleDenseMapInfo {
4293 static SmallVector<VPValue *, 4> getEmptyKey() {
4294 return {reinterpret_cast<VPValue *>(-1)};
4295 }
4296
4297 static SmallVector<VPValue *, 4> getTombstoneKey() {
4298 return {reinterpret_cast<VPValue *>(-2)};
4299 }
4300
4301 static unsigned getHashValue(const SmallVector<VPValue *, 4> &V) {
4302 return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
4303 }
4304
4305 static bool isEqual(const SmallVector<VPValue *, 4> &LHS,
4307 return LHS == RHS;
4308 }
4309 };
4310
4311 /// Mapping of values in the original VPlan to a combined VPInstruction.
4313 BundleToCombined;
4314
4316
4317 /// Basic block to operate on. For now, only instructions in a single BB are
4318 /// considered.
4319 const VPBasicBlock &BB;
4320
4321 /// Indicates whether we managed to combine all visited instructions or not.
4322 bool CompletelySLP = true;
4323
4324 /// Width of the widest combined bundle in bits.
4325 unsigned WidestBundleBits = 0;
4326
4327 using MultiNodeOpTy =
4328 typename std::pair<VPInstruction *, SmallVector<VPValue *, 4>>;
4329
4330 // Input operand bundles for the current multi node. Each multi node operand
4331 // bundle contains values not matching the multi node's opcode. They will
4332 // be reordered in reorderMultiNodeOps, once we completed building a
4333 // multi node.
4334 SmallVector<MultiNodeOpTy, 4> MultiNodeOps;
4335
4336 /// Indicates whether we are building a multi node currently.
4337 bool MultiNodeActive = false;
4338
4339 /// Check if we can vectorize Operands together.
4340 bool areVectorizable(ArrayRef<VPValue *> Operands) const;
4341
4342 /// Add combined instruction \p New for the bundle \p Operands.
4343 void addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New);
4344
4345 /// Indicate we hit a bundle we failed to combine. Returns nullptr for now.
4346 VPInstruction *markFailed();
4347
4348 /// Reorder operands in the multi node to maximize sequential memory access
4349 /// and commutative operations.
4350 SmallVector<MultiNodeOpTy, 4> reorderMultiNodeOps();
4351
4352 /// Choose the best candidate to use for the lane after \p Last. The set of
4353 /// candidates to choose from are values with an opcode matching \p Last's
4354 /// or loads consecutive to \p Last.
4355 std::pair<OpMode, VPValue *> getBest(OpMode Mode, VPValue *Last,
4356 SmallPtrSetImpl<VPValue *> &Candidates,
4358
4359#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4360 /// Print bundle \p Values to dbgs().
4361 void dumpBundle(ArrayRef<VPValue *> Values);
4362#endif
4363
4364public:
4365 VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB) : IAI(IAI), BB(BB) {}
4366
4367 ~VPlanSlp() = default;
4368
4369 /// Tries to build an SLP tree rooted at \p Operands and returns a
4370 /// VPInstruction combining \p Operands, if they can be combined.
4372
4373 /// Return the width of the widest combined bundle in bits.
4374 unsigned getWidestBundleBits() const { return WidestBundleBits; }
4375
4376 /// Return true if all visited instruction can be combined.
4377 bool isCompletelySLP() const { return CompletelySLP; }
4378};
4379} // end namespace llvm
4380
4381#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
always inline
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
RelocType Type
Definition: COFFYAML.cpp:410
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
uint64_t Addr
std::string Name
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1313
Flatten the CFG
Hexagon Common GEP
std::pair< BasicBlock *, unsigned > BlockTy
A pair of (basic block, score).
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This pass exposes codegen information to IR-level passes.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPDefID)
Definition: VPlan.h:825
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:608
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:322
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:205
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:480
uint32_t getFactor() const
Definition: VectorUtils.h:496
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:550
InstTy * getInsertPos() const
Definition: VectorUtils.h:566
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:622
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
bool onlyWritesMemory() const
Whether this function only (at most) writes memory.
Definition: ModRef.h:198
bool onlyReadsMemory() const
Whether this function only (at most) reads memory.
Definition: ModRef.h:195
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:77
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:113
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This class provides computation of slot numbers for LLVM Assembly writing.
Definition: AsmWriter.cpp:698
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Iterator to iterate over vectorization factors in a VFRange.
Definition: VPlan.h:121
ElementCount operator*() const
Definition: VPlan.h:129
iterator & operator++()
Definition: VPlan.h:131
iterator(ElementCount VF)
Definition: VPlan.h:125
bool operator==(const iterator &Other) const
Definition: VPlan.h:127
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
Definition: VPlan.h:3228
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3236
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3245
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition: VPlan.h:3230
~VPActiveLaneMaskPHIRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:3470
RecipeListTy::const_iterator const_iterator
Definition: VPlan.h:3495
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition: VPlan.h:3542
VPBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3590
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition: VPlan.h:3497
RecipeListTy::iterator iterator
Instruction iterators...
Definition: VPlan.h:3494
void connectToPredecessors(VPTransformState::CFGState &CFG)
Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block generated for this VPBB.
Definition: VPlan.cpp:419
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:477
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition: VPlan.h:3520
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition: VPlan.h:3478
iterator end()
Definition: VPlan.h:3504
VPBasicBlock(const Twine &Name="", VPRecipeBase *Recipe=nullptr)
Definition: VPlan.h:3482
iterator begin()
Recipe iterator methods.
Definition: VPlan.h:3502
RecipeListTy::reverse_iterator reverse_iterator
Definition: VPlan.h:3496
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition: VPlan.h:3555
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of this VPBasicBlock.
Definition: VPlan.cpp:765
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition: VPlan.cpp:213
~VPBasicBlock() override
Definition: VPlan.h:3488
VPRegionBlock * getEnclosingLoopRegion()
Definition: VPlan.cpp:566
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:516
const_reverse_iterator rbegin() const
Definition: VPlan.h:3508
reverse_iterator rend()
Definition: VPlan.h:3509
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition: VPlan.cpp:538
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition: VPlan.h:3476
void executeRecipes(VPTransformState *State, BasicBlock *BB)
Execute the recipes in the IR basic block BB.
Definition: VPlan.cpp:526
VPRecipeBase & back()
Definition: VPlan.h:3517
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPBsicBlock to O, prefixing all lines with Indent.
Definition: VPlan.cpp:631
const VPRecipeBase & front() const
Definition: VPlan.h:3514
const_iterator begin() const
Definition: VPlan.h:3503
VPRecipeBase & front()
Definition: VPlan.h:3515
bool isExiting() const
Returns true if the block is exiting it's parent region.
Definition: VPlan.cpp:614
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition: VPlan.cpp:602
const VPRecipeBase & back() const
Definition: VPlan.h:3516
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition: VPlan.h:3533
bool empty() const
Definition: VPlan.h:3513
const_iterator end() const
Definition: VPlan.h:3505
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3528
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition: VPlan.h:3523
reverse_iterator rbegin()
Definition: VPlan.h:3507
size_t size() const
Definition: VPlan.h:3512
const_reverse_iterator rend() const
Definition: VPlan.h:3510
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
Definition: VPlan.h:2425
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands)
The blend operation is a User of the incoming values and of their respective masks,...
Definition: VPlan.h:2431
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2478
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition: VPlan.h:2454
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition: VPlan.h:2459
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition: VPlan.h:2449
VPBlendRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2436
void execute(VPTransformState &State) override
Generate the phi/select nodes.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition: VPlan.h:2445
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition: VPlan.h:396
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition: VPlan.h:612
VPRegionBlock * getParent()
Definition: VPlan.h:488
VPBlocksTy & getPredecessors()
Definition: VPlan.h:520
iterator_range< VPBlockBase ** > predecessors()
Definition: VPlan.h:517
const VPBasicBlock * getExitingBasicBlock() const
Definition: VPlan.cpp:178
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition: VPlan.h:678
void setName(const Twine &newName)
Definition: VPlan.h:481
size_t getNumSuccessors() const
Definition: VPlan.h:534
iterator_range< VPBlockBase ** > successors()
Definition: VPlan.h:516
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition: VPlan.h:627
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition: VPlan.cpp:619
bool isLegalToHoistInto()
Return true if it is legal to hoist instructions into this block.
Definition: VPlan.h:643
virtual ~VPBlockBase()=default
void print(raw_ostream &O) const
Print plain-text dump of this VPlan to O.
Definition: VPlan.h:668
const VPBlocksTy & getHierarchicalPredecessors()
Definition: VPlan.h:570
size_t getNumPredecessors() const
Definition: VPlan.h:535
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition: VPlan.h:603
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition: VPlan.cpp:200
const VPBlocksTy & getPredecessors() const
Definition: VPlan.h:519
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
static void deleteCFG(VPBlockBase *Entry)
Delete all blocks reachable from a given VPBlockBase, inclusive.
Definition: VPlan.cpp:208
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition: VPlan.h:473
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
VPlan * getPlan()
Definition: VPlan.cpp:153
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition: VPlan.cpp:172
const VPRegionBlock * getParent() const
Definition: VPlan.h:489
const std::string & getName() const
Definition: VPlan.h:479
void clearSuccessors()
Remove all the successors of this block.
Definition: VPlan.h:622
VPBlockBase * getSingleHierarchicalSuccessor()
Definition: VPlan.h:560
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition: VPlan.h:594
VPBlockBase * getSinglePredecessor() const
Definition: VPlan.h:530
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition: VPlan.h:554
void clearPredecessors()
Remove all the predecessor of this block.
Definition: VPlan.h:619
unsigned getVPBlockID() const
Definition: VPlan.h:486
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition: VPlan.h:654
VPBlockBase(const unsigned char SC, const std::string &N)
Definition: VPlan.h:465
VPBlocksTy & getSuccessors()
Definition: VPlan.h:514
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition: VPlan.cpp:192
const VPBasicBlock * getEntryBasicBlock() const
Definition: VPlan.cpp:158
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition: VPlan.h:583
void setParent(VPRegionBlock *P)
Definition: VPlan.h:499
virtual void dropAllReferences(VPValue *NewValue)=0
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
VPBlockBase * getSingleHierarchicalPredecessor()
Definition: VPlan.h:576
VPBlockBase * getSingleSuccessor() const
Definition: VPlan.h:524
const VPBlocksTy & getSuccessors() const
Definition: VPlan.h:513
Class that provides utilities for VPBlockBases in VPlan.
Definition: VPlan.h:4107
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
Definition: VPlan.h:4211
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition: VPlan.h:4116
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition: VPlan.h:4232
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
Definition: VPlan.h:4151
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4170
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition: VPlan.h:4189
static void reassociateBlocks(VPBlockBase *Old, VPBlockBase *New)
Reassociate all the blocks connected to Old so that they now point to New.
Definition: VPlan.h:4197
static void insertBlockBefore(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected block NewBlock before Blockptr.
Definition: VPlan.h:4133
A recipe for generating conditional branches on the bits of a mask.
Definition: VPlan.h:2783
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2819
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition: VPlan.h:2807
VPBranchOnMaskRecipe(VPValue *BlockInMask)
Definition: VPlan.h:2785
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2791
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2826
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Canonical scalar induction phi of the vector loop.
Definition: VPlan.h:3162
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:3205
~VPCanonicalIVPHIRecipe() override=default
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3177
VPCanonicalIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3169
VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
Definition: VPlan.h:3164
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3198
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:3193
bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step) const
Check if the induction described by Kind, /p Start and Step is canonical, i.e.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3181
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCanonicalIVPHIRecipe.
Definition: VPlan.h:3217
This class augments a recipe with a set of VPValues defined by the recipe.
Definition: VPlanValue.h:292
unsigned getVPDefID() const
Definition: VPlanValue.h:419
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition: VPlan.h:3344
void execute(VPTransformState &State) override
Generate the transformed value of the induction at offset StartValue (1.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition: VPlan.h:3383
VPValue * getStepValue() const
Definition: VPlan.h:3400
Type * getScalarType() const
Definition: VPlan.h:3395
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3371
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3363
~VPDerivedIVRecipe() override=default
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3403
VPValue * getStartValue() const
Definition: VPlan.h:3399
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start, VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step, const Twine &Name="")
Definition: VPlan.h:3355
A recipe for generating the phi node for the current index of elements, adjusted in accordance with E...
Definition: VPlan.h:3263
static bool classof(const VPHeaderPHIRecipe *D)
Definition: VPlan.h:3276
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPEVLBasedIVPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3270
~VPEVLBasedIVPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition: VPlan.h:3280
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPEVLBasedIVPHIRecipe.
Definition: VPlan.h:3286
VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition: VPlan.h:3265
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3293
Recipe to expand a SCEV expression.
Definition: VPlan.h:3123
VPExpandSCEVRecipe(const SCEV *Expr, ScalarEvolution &SE)
Definition: VPlan.h:3128
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition: VPlan.h:3143
const SCEV * getSCEV() const
Definition: VPlan.h:3155
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3133
~VPExpandSCEVRecipe() override=default
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition: VPlan.h:2023
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
static bool classof(const VPValue *V)
Definition: VPlan.h:2040
VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr, VPValue *Start=nullptr, DebugLoc DL={})
Definition: VPlan.h:2025
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2071
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition: VPlan.h:2060
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition: VPlan.h:2068
VPValue * getStartValue() const
Definition: VPlan.h:2063
static bool classof(const VPRecipeBase *B)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:2036
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition: VPlan.h:2077
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition: VPlan.h:1772
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VP_CLASSOF_IMPL(VPDef::VPHistogramSC)
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1784
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
VPHistogramRecipe(unsigned Opcode, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:1778
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition: VPlan.h:1801
unsigned getOpcode() const
Definition: VPlan.h:1797
~VPHistogramRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition: VPlan.h:3616
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition: VPlan.cpp:455
VPIRBasicBlock(BasicBlock *IRBB)
Definition: VPlan.h:3620
static VPIRBasicBlock * fromBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition: VPlan.cpp:843
BasicBlock * getIRBasicBlock() const
Definition: VPlan.h:3646
~VPIRBasicBlock() override
Definition: VPlan.h:3625
static bool classof(const VPBlockBase *V)
Definition: VPlan.h:3627
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition: VPlan.h:3639
A recipe to wrap on original IR instruction not to be modified during execution, execept for PHIs.
Definition: VPlan.h:1382
Instruction & getInstruction() const
Definition: VPlan.h:1406
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition: VPlan.h:1420
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1393
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition: VPlan.h:1414
VPIRInstruction(Instruction &I)
Definition: VPlan.h:1386
This is a concrete Recipe that models a single VPlan-level instruction.
Definition: VPlan.h:1197
VPInstruction(VPValue *Ptr, VPValue *Offset, GEPNoWrapFlags Flags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1297
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, DebugLoc DL, const Twine &Name="")
Definition: VPlan.h:1272
VPInstruction * clone() override
Clone the current recipe.
Definition: VPlan.h:1308
@ ResumePhi
Creates a scalar phi in a leaf VPBB with a single predecessor in VPlan.
Definition: VPlan.h:1215
@ FirstOrderRecurrenceSplice
Definition: VPlan.h:1203
@ CanonicalIVIncrementForPart
Definition: VPlan.h:1218
@ CalculateTripCountMinusVF
Definition: VPlan.h:1216
bool hasResult() const
Definition: VPlan.h:1338
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition: VPlan.h:1375
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition: VPlan.h:1323
unsigned getOpcode() const
Definition: VPlan.h:1315
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, WrapFlagsTy WrapFlags, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1284
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1277
VPInstruction(unsigned Opcode, std::initializer_list< VPValue * > Operands, DisjointFlagsTy DisjointFlag, DebugLoc DL={}, const Twine &Name="")
Definition: VPlan.h:1289
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
void execute(VPTransformState &State) override
Generate the instruction.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition: VPlan.h:2492
bool onlyFirstLaneUsed(const VPValue *Op) const override
The recipe only uses the first lane of the address.
Definition: VPlan.h:2575
~VPInterleaveRecipe() override=default
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2533
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps)
Definition: VPlan.h:2504
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2539
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2525
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition: VPlan.h:2546
Instruction * getInsertPos() const
Definition: VPlan.h:2581
const InterleaveGroup< Instruction > * getInterleaveGroup()
Definition: VPlan.h:2566
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInterleaveRecipe.
unsigned getNumStoreOperands() const
Returns the number of stored operands of this interleave group.
Definition: VPlan.h:2570
InterleaveGroup< VPInstruction > * getInterleaveGroup(VPInstruction *Instr) const
Get the interleave group that Instr belongs to.
Definition: VPlan.h:4280
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition: VPlan.h:153
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition: VPlan.h:194
static unsigned getNumCachedLanes(const ElementCount &VF)
Returns the maxmimum number of lanes that we are able to consider caching for VF.
Definition: VPlan.h:229
Value * getAsRuntimeExpr(IRBuilderBase &Builder, const ElementCount &VF) const
Returns an expression describing the lane index that can be used at runtime.
Definition: VPlan.cpp:73
VPLane(unsigned Lane, Kind LaneKind)
Definition: VPlan.h:176
Kind getKind() const
Returns the Kind of lane offset.
Definition: VPlan.h:210
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition: VPlan.h:180
bool isFirstLane() const
Returns true if this is the first lane of the whole vector.
Definition: VPlan.h:213
VPLane(unsigned Lane)
Definition: VPlan.h:175
unsigned getKnownLane() const
Returns a compile-time known value for the lane index and asserts if the lane can only be calculated ...
Definition: VPlan.h:200
static VPLane getFirstLane()
Definition: VPlan.h:178
Kind
Kind describes how to interpret Lane.
Definition: VPlan.h:156
@ ScalableLast
For ScalableLast, Lane is the offset from the start of the last N-element subvector in a scalable vec...
@ First
For First, Lane is the index into the first N elements of a fixed-vector <N x <ElTy>> or a scalable v...
unsigned mapToCacheIndex(const ElementCount &VF) const
Maps the lane to a cache index based on VF.
Definition: VPlan.h:216
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
Definition: VPlan.h:2838
~VPPredInstPHIRecipe() override=default
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2870
VPPredInstPHIRecipe(VPValue *PredV)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition: VPlan.h:2842
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2846
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition: VPlan.h:2857
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition: VPlan.h:720
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition: VPlan.h:809
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
virtual ~VPRecipeBase()=default
VPBasicBlock * getParent()
Definition: VPlan.h:745
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition: VPlan.h:814
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:787
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:731
virtual VPRecipeBase * clone()=0
Clone the current recipe.
const VPBasicBlock * getParent() const
Definition: VPlan.h:746
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition: VPlan.h:792
VPRecipeBase(const unsigned char SC, iterator_range< IterT > Operands, DebugLoc DL={})
Definition: VPlan.h:736
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
bool isPhi() const
Returns true for PHI-like recipes.
Definition: VPlan.h:798
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Class to record LLVM IR flag for a recipe along with it.
Definition: VPlan.h:930
ExactFlagsTy ExactFlags
Definition: VPlan.h:980
FastMathFlagsTy FMFs
Definition: VPlan.h:983
NonNegFlagsTy NonNegFlags
Definition: VPlan.h:982
CmpInst::Predicate CmpPredicate
Definition: VPlan.h:977
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition: VPlan.h:1150
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, CmpInst::Predicate Pred, DebugLoc DL={})
Definition: VPlan.h:1032
void setFlags(Instruction *I) const
Set the IR flags for I.
Definition: VPlan.h:1111
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, GEPNoWrapFlags GEPFlags, DebugLoc DL={})
Definition: VPlan.h:1057
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1063
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, FastMathFlags FMFs, DebugLoc DL={})
Definition: VPlan.h:1044
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition: VPlan.h:1080
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition: VPlan.h:1153
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, Instruction &I)
Definition: VPlan.h:1002
DisjointFlagsTy DisjointFlags
Definition: VPlan.h:979
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, WrapFlagsTy WrapFlags, DebugLoc DL={})
Definition: VPlan.h:1038
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DisjointFlagsTy DisjointFlags, DebugLoc DL={})
Definition: VPlan.h:1050
GEPNoWrapFlags GEPFlags
Definition: VPlan.h:981
void transferFlags(VPRecipeWithIRFlags &Other)
Definition: VPlan.h:988
WrapFlagsTy WrapFlags
Definition: VPlan.h:978
bool hasNoUnsignedWrap() const
Definition: VPlan.h:1157
bool isDisjoint() const
Definition: VPlan.h:1169
void printFlags(raw_ostream &O) const
CmpInst::Predicate getPredicate() const
Definition: VPlan.h:1144
bool hasNoSignedWrap() const
Definition: VPlan.h:1163
static bool classof(const VPUser *U)
Definition: VPlan.h:1074
FastMathFlags getFastMathFlags() const
VPRecipeWithIRFlags(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:995
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
Definition: VPlan.h:2665
void execute(VPTransformState &State) override
Generate the reduction in the loop.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2695
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition: VPlan.h:2692
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp)
Definition: VPlan.h:2667
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2676
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPReductionEVLRecipe() override=default
A recipe for handling reduction phis.
Definition: VPlan.h:2366
VPReductionPHIRecipe(PHINode *Phi, const RecurrenceDescriptor &RdxDesc, VPValue &Start, bool IsInLoop=false, bool IsOrdered=false)
Create a new VPReductionPHIRecipe for the reduction Phi described by RdxDesc.
Definition: VPlan.h:2379
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition: VPlan.h:2417
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2389
~VPReductionPHIRecipe() override=default
bool isInLoop() const
Returns true, if the phi is part of an in-loop reduction.
Definition: VPlan.h:2420
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2399
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Definition: VPlan.h:2412
A recipe to represent inloop reduction operations, performing a reduction on a vector operand into a ...
Definition: VPlan.h:2587
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition: VPlan.h:2650
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2620
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition: VPlan.h:2654
const RecurrenceDescriptor & getRecurrenceDescriptor() const
Return the recurrence decriptor for the in-loop reduction.
Definition: VPlan.h:2644
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPReductionRecipe(const RecurrenceDescriptor &R, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2606
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition: VPlan.h:2656
VPReductionRecipe(const unsigned char SC, const RecurrenceDescriptor &R, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, bool IsOrdered)
Definition: VPlan.h:2595
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition: VPlan.h:2648
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition: VPlan.h:2652
VPReductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2615
void execute(VPTransformState &State) override
Generate the reduction in the loop.
static bool classof(const VPUser *U)
Definition: VPlan.h:2625
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition: VPlan.h:3657
VPRegionBlock * clone() override
Clone all blocks in the single-entry single-exit region of the block and their recipes without updati...
Definition: VPlan.cpp:702
const VPBlockBase * getEntry() const
Definition: VPlan.h:3696
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition: VPlan.h:3728
void dropAllReferences(VPValue *NewValue) override
Replace all operands of VPUsers in the block with NewValue and also replaces all uses of VPValues def...
Definition: VPlan.cpp:711
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3713
VPBlockBase * getExiting()
Definition: VPlan.h:3709
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition: VPlan.h:3701
InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override
Return the cost of the block.
Definition: VPlan.cpp:772
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print this VPRegionBlock to O (recursively), prefixing all lines with Indent.
Definition: VPlan.cpp:810
VPRegionBlock(const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3679
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="", bool IsReplicator=false)
Definition: VPlan.h:3670
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPRegionBlock,...
Definition: VPlan.cpp:718
const VPBlockBase * getExiting() const
Definition: VPlan.h:3708
VPBlockBase * getEntry()
Definition: VPlan.h:3697
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition: VPlan.h:3721
~VPRegionBlock() override
Definition: VPlan.h:3683
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition: VPlan.h:3692
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition: VPlan.h:2706
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
~VPReplicateRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2755
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition: VPlan.h:2762
bool isUniform() const
Definition: VPlan.h:2750
bool isPredicated() const
Definition: VPlan.h:2752
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2725
VPReplicateRecipe(Instruction *I, iterator_range< IterT > Operands, bool IsUniform, VPValue *Mask=nullptr)
Definition: VPlan.h:2715
unsigned getOpcode() const
Definition: VPlan.h:2779
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition: VPlan.h:2774
bool shouldPack() const
Returns true if the recipe is used by a widened recipe via an intervening VPPredInstPHIRecipe.
A recipe to compute the pointers for widened memory accesses of IndexTy in reverse order.
Definition: VPlan.h:1900
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition: VPlan.h:1924
VPReverseVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1938
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1931
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1917
VPReverseVectorPointerRecipe(VPValue *Ptr, VPValue *VF, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1904
const VPValue * getVFValue() const
Definition: VPlan.h:1913
VPScalarCastRecipe is a recipe to create scalar cast instructions.
Definition: VPlan.h:1581
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarCastRecipe.
Definition: VPlan.h:1604
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Each concrete VPDef prints itself.
~VPScalarCastRecipe() override=default
VPScalarCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1595
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1618
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1616
VPScalarCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1589
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
Definition: VPlan.h:3413
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3460
VPValue * getStepValue() const
Definition: VPlan.h:3457
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition: VPlan.h:3445
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step)
Definition: VPlan.h:3423
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3433
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, Instruction::BinaryOps Opcode, FastMathFlags FMFs)
Definition: VPlan.h:3417
~VPScalarIVStepsRecipe() override=default
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Recipe to generate a scalar PHI.
Definition: VPlan.h:2250
VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL, StringRef Name)
Definition: VPlan.h:2254
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2273
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPScalarPHIRecipe() override=default
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPScalarPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2263
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition: VPlan.h:847
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL={})
Definition: VPlan.h:853
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition: VPlan.h:916
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:862
const Instruction * getUnderlyingInstr() const
Definition: VPlan.h:919
VPSingleDefRecipe(const unsigned char SC, IterT Operands, DebugLoc DL={})
Definition: VPlan.h:850
static bool classof(const VPUser *U)
Definition: VPlan.h:908
LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe(const unsigned char SC, IterT Operands, Value *UV, DebugLoc DL={})
Definition: VPlan.h:858
virtual VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class can be used to assign names to VPValues.
Definition: VPlanValue.h:440
An analysis for type-inference for VPValues.
Definition: VPlanAnalysis.h:40
Helper to access the operand that contains the unroll part for this recipe after unrolling.
Definition: VPlan.h:1182
VPValue * getUnrollPartOperand(VPUser &U) const
Return the VPValue operand containing the unroll part or null if there is no such operand.
unsigned getUnrollPart(VPUser &U) const
Return the unroll part.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition: VPlanValue.h:200
operand_range operands()
Definition: VPlanValue.h:257
void setOperand(unsigned I, VPValue *New)
Definition: VPlanValue.h:242
unsigned getNumOperands() const
Definition: VPlanValue.h:236
operand_iterator op_end()
Definition: VPlanValue.h:255
operand_iterator op_begin()
Definition: VPlanValue.h:253
VPValue * getOperand(unsigned N) const
Definition: VPlanValue.h:237
VPUser()=delete
void addOperand(VPValue *Operand)
Definition: VPlanValue.h:231
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop region.
Definition: VPlan.cpp:1417
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition: VPlan.cpp:123
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition: VPlanValue.h:77
unsigned getNumUsers() const
Definition: VPlanValue.h:111
Value * getLiveInIRValue()
Returns the underlying IR value, if this VPValue is defined outside the scope of VPlan.
Definition: VPlanValue.h:172
bool isLiveIn() const
Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
Definition: VPlanValue.h:167
friend class VPRecipeBase
Definition: VPlanValue.h:52
user_range users()
Definition: VPlanValue.h:132
A recipe to compute the pointers for widened memory accesses of IndexTy.
Definition: VPlan.h:1953
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPVectorPointerRecipe(VPValue *Ptr, Type *IndexedTy, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition: VPlan.h:1957
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool onlyFirstPartUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition: VPlan.h:1974
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition: VPlan.h:1967
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition: VPlan.h:1987
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1981
A recipe for widening Call instructions using library calls.
Definition: VPlan.h:1716
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
const_operand_range arg_operands() const
Definition: VPlan.h:1756
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1735
Function * getCalledScalarFunction() const
Definition: VPlan.h:1749
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
operand_range arg_operands()
Definition: VPlan.h:1753
~VPWidenCallRecipe() override=default
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, DebugLoc DL={})
Definition: VPlan.h:1723
A Recipe for widening the canonical induction variable of the vector loop.
Definition: VPlan.h:3308
void execute(VPTransformState &State) override
Generate a canonical vector induction variable of the vector loop, with start = {<Part*VF,...
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenCanonicalIVRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition: VPlan.h:3328
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3315
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
Definition: VPlan.h:3310
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition: VPlan.h:1529
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst &UI)
Definition: VPlan.h:1537
Instruction::CastOps getOpcode() const
Definition: VPlan.h:1574
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Returns the result type of the cast.
Definition: VPlan.h:1577
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
Definition: VPlan.h:1545
void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1551
A recipe for widening operations with vector-predication intrinsics with explicit vector length (EVL)...
Definition: VPlan.h:1482
const VPValue * getEVL() const
Definition: VPlan.h:1506
~VPWidenEVLRecipe() override=default
VPWidenEVLRecipe(Instruction &I, iterator_range< IterT > Operands, VPValue &EVL)
Definition: VPlan.h:1487
VPWidenRecipe * clone() override final
Clone the current recipe.
Definition: VPlan.h:1498
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override final
Print the recipe.
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC)
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
Definition: VPlan.h:1491
void execute(VPTransformState &State) override final
Produce a vp-intrinsic using the opcode and operands of the recipe, processing EVL elements.
VPValue * getEVL()
Definition: VPlan.h:1505
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:1513
A recipe for handling GEP instructions.
Definition: VPlan.h:1851
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the gep nodes.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition: VPlan.h:1884
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1873
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range< IterT > Operands)
Definition: VPlan.h:1868
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
Definition: VPlan.h:2085
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition: VPlan.h:2112
PHINode * getPHINode() const
Definition: VPlan.h:2107
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2089
VPValue * getStepValue()
Returns the step value of the induction.
Definition: VPlan.h:2104
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition: VPlan.h:2110
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition: VPlan.h:2119
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2096
const VPValue * getStepValue() const
Definition: VPlan.h:2105
virtual void execute(VPTransformState &State) override=0
Generate the phi nodes.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition: VPlan.h:2129
const TruncInst * getTruncInst() const
Definition: VPlan.h:2183
const VPValue * getVFValue() const
Definition: VPlan.h:2172
~VPWidenIntOrFpInductionRecipe() override=default
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, DebugLoc DL)
Definition: VPlan.h:2142
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2153
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition: VPlan.h:2182
void execute(VPTransformState &State) override
Generate the vectorized and scalarized versions of the phi node as needed by their users.
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition: VPlan.h:2133
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition: VPlan.h:2198
Type * getScalarType() const
Returns the scalar type of the induction.
Definition: VPlan.h:2191
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition: VPlan.h:1627
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, std::initializer_list< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1668
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition: VPlan.h:1698
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1653
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition: VPlan.h:1704
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1676
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition: VPlan.h:1701
~VPWidenIntrinsicRecipe() override=default
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getResultType() const
Return the scalar return type of the intrinsic.
Definition: VPlan.h:1692
void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, DebugLoc DL={})
Definition: VPlan.h:1644
A common base class for widening memory operations.
Definition: VPlan.h:2879
bool IsMasked
Whether the memory access is masked.
Definition: VPlan.h:2890
bool Reverse
Whether the consecutive accessed addresses are in reverse order.
Definition: VPlan.h:2887
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition: VPlan.h:2926
static bool classof(const VPUser *U)
Definition: VPlan.h:2920
void execute(VPTransformState &State) override
Generate the wide load/store.
Definition: VPlan.h:2946
Instruction & Ingredient
Definition: VPlan.h:2881
VPWidenMemoryRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2909
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition: VPlan.h:2954
bool Consecutive
Whether the accessed addresses are consecutive.
Definition: VPlan.h:2884
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:2913
VPWidenMemoryRecipe(const char unsigned SC, Instruction &I, std::initializer_list< VPValue * > Operands, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2900
VPValue * getMask() const
Return the mask used by this recipe.
Definition: VPlan.h:2940
bool isMasked() const
Returns true if the recipe is masked.
Definition: VPlan.h:2936
void setMask(VPValue *Mask)
Definition: VPlan.h:2892
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition: VPlan.h:2933
bool isReverse() const
Return whether the consecutive loaded/stored addresses are in reverse order.
Definition: VPlan.h:2930
A recipe for handling phis that are widened in the vector loop.
Definition: VPlan.h:2289
void addIncoming(VPValue *IncomingV, VPBasicBlock *IncomingBlock)
Adds a pair (IncomingV, IncomingBlock) to the phi.
Definition: VPlan.h:2319
VPValue * getIncomingValue(unsigned I)
Returns the I th incoming VPValue.
Definition: VPlan.h:2328
VPWidenPHIRecipe(PHINode *Phi, VPValue *Start=nullptr)
Create a new VPWidenPHIRecipe for Phi with start value Start.
Definition: VPlan.h:2295
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2301
~VPWidenPHIRecipe() override=default
VPBasicBlock * getIncomingBlock(unsigned I)
Returns the I th incoming VPBasicBlock.
Definition: VPlan.h:2325
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2219
~VPWidenPointerInductionRecipe() override=default
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, bool IsScalarAfterVectorization, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start.
Definition: VPlan.h:2210
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
VPValue * getFirstUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the first unrolled part,...
Definition: VPlan.h:2236
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition: VPlan.h:1431
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
VPWidenRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1447
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenRecipe() override=default
VPWidenRecipe(Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1442
unsigned getOpcode() const
Definition: VPlan.h:1471
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1436
static bool classof(const VPRecipeBase *R)
Definition: VPlan.h:1453
static bool classof(const VPUser *U)
Definition: VPlan.h:1458
VPlanPrinter prints a given VPlan to a given output stream.
Definition: VPlan.h:4036
VPlanPrinter(raw_ostream &O, const VPlan &P)
Definition: VPlan.h:4077
LLVM_DUMP_METHOD void dump()
Definition: VPlan.cpp:1282
Class that maps (parts of) an existing VPlan to trees of combined VPInstructions.
Definition: VPlan.h:4287
VPInstruction * buildGraph(ArrayRef< VPValue * > Operands)
Tries to build an SLP tree rooted at Operands and returns a VPInstruction combining Operands,...
Definition: VPlanSLP.cpp:359
bool isCompletelySLP() const
Return true if all visited instruction can be combined.
Definition: VPlan.h:4377
~VPlanSlp()=default
VPlanSlp(VPInterleavedAccessInfo &IAI, VPBasicBlock &BB)
Definition: VPlan.h:4365
unsigned getWidestBundleBits() const
Return the width of the widest combined bundle in bits.
Definition: VPlan.h:4374
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition: VPlan.h:3761
void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition: VPlan.cpp:1177
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition: VPlan.cpp:1153
void prepareToExecute(Value *TripCount, Value *VectorTripCount, VPTransformState &State)
Prepare the plan for execution, setting up the required live-in values.
Definition: VPlan.cpp:930
bool hasScalableVF()
Definition: VPlan.h:3944
VPBasicBlock * getEntry()
Definition: VPlan.h:3869
VPValue & getVectorTripCount()
The vector trip count.
Definition: VPlan.h:3927
void setName(const Twine &newName)
Definition: VPlan.h:3972
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition: VPlan.h:3933
VPValue & getVF()
Returns the VF of the vector loop region.
Definition: VPlan.h:3930
VPValue * getTripCount() const
The trip count of the original loop.
Definition: VPlan.h:3906
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition: VPlan.h:3920
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition: VPlan.h:3950
const VPBasicBlock * getEntry() const
Definition: VPlan.h:3870
unsigned getUF() const
Definition: VPlan.h:3958
static VPlanPtr createInitialVPlan(Type *InductionTy, PredicatedScalarEvolution &PSE, bool RequiresScalarEpilogueCheck, bool TailFolded, Loop *TheLoop)
Create initial VPlan, having an "entry" VPBasicBlock (wrapping original scalar pre-header) which cont...
Definition: VPlan.cpp:851
VPlan(VPBasicBlock *Entry, VPValue *TC, VPIRBasicBlock *ScalarHeader)
Construct a VPlan with Entry entering the plan, trip count TC and with ScalarHeader wrapping the orig...
Definition: VPlan.h:3827
bool hasVF(ElementCount VF)
Definition: VPlan.h:3943
void addSCEVExpansion(const SCEV *S, VPValue *V)
Definition: VPlan.h:4023
VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader)
Construct a VPlan with Entry to the plan and with ScalarHeader wrapping the original header of the sc...
Definition: VPlan.h:3818
bool hasUF(unsigned UF) const
Definition: VPlan.h:3956
void setVF(ElementCount VF)
Definition: VPlan.h:3937
auto getExitBlocks()
Return an iterator range over the VPIRBasicBlock wrapping the exit blocks of the VPlan,...
Definition: VPlanCFG.h:309
VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition: VPlan.cpp:1084
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition: VPlan.cpp:1078
const VPBasicBlock * getMiddleBlock() const
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition: VPlan.h:3884
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition: VPlan.h:3913
VPBasicBlock * getMiddleBlock()
Definition: VPlan.h:3887
void setEntry(VPBasicBlock *VPBB)
Definition: VPlan.h:3839
VPValue * getOrAddLiveIn(Value *V)
Gets the live-in VPValue for V or adds a new live-in (if none exists yet) for V.
Definition: VPlan.h:3976
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition: VPlan.cpp:1183
bool hasScalarVFOnly() const
Definition: VPlan.h:3954
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition: VPlan.h:3892
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition: VPlan.cpp:977
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the vector loop.
Definition: VPlan.h:4010
void print(raw_ostream &O) const
Print this VPlan to O.
Definition: VPlan.cpp:1136
void addVF(ElementCount VF)
Definition: VPlan.h:3935
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition: VPlan.h:3897
VPValue * getLiveIn(Value *V) const
Return the live-in VPValue for V, if there is one or nullptr otherwise.
Definition: VPlan.h:3993
VPValue * getSCEVExpansion(const SCEV *S) const
Definition: VPlan.h:4019
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition: VPlan.cpp:1100
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region.
Definition: VPlan.h:3873
void setUF(unsigned UF)
Definition: VPlan.h:3963
VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition: VPlan.cpp:1224
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
An ilist node that can access its parent list.
Definition: ilist_node.h:321
base_list_type::const_reverse_iterator const_reverse_iterator
Definition: ilist.h:125
void pop_back()
Definition: ilist.h:255
base_list_type::reverse_iterator reverse_iterator
Definition: ilist.h:123
base_list_type::const_iterator const_iterator
Definition: ilist.h:122
iterator insert(iterator where, pointer New)
Definition: ilist.h:165
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
AttributeList getAttributes(LLVMContext &C, ID id)
Return the attributes for an intrinsic.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1759
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition: Casting.h:720
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
auto dyn_cast_or_null(const Y &Val)
Definition: Casting.h:759
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
std::unique_ptr< VPlan > VPlanPtr
Definition: VPlan.h:144
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1299
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:573
@ Other
Any other memory.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
Value * createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, int64_t Step)
Return a value for Step multiplied by VF.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
unsigned getReciprocalPredBlockProb()
A helper function that returns the reciprocal of the block probability of predicated blocks.
Definition: VPlan.h:92
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Definition: VPlan.h:97
iterator end()
Definition: VPlan.h:138
const ElementCount Start
Definition: VPlan.h:99
ElementCount End
Definition: VPlan.h:102
iterator begin()
Definition: VPlan.h:137
bool isEmpty() const
Definition: VPlan.h:104
VFRange(const ElementCount &Start, const ElementCount &End)
Definition: VPlan.h:108
Struct to hold various analysis needed for cost computations.
Definition: VPlan.h:688
LLVMContext & LLVMCtx
Definition: VPlan.h:692
LoopVectorizationCostModel & CM
Definition: VPlan.h:693
TargetTransformInfo::OperandValueInfo getOperandInfo(VPValue *V) const
Returns the OperandInfo for V, if it is a live-in.
Definition: VPlan.cpp:1667
bool skipCostComputation(Instruction *UI, bool IsVector) const
Return true if the cost for UI shouldn't be computed, e.g.
InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const
Return the cost for UI with VF using the legacy cost model as fallback until computing the cost of al...
VPTypeAnalysis Types
Definition: VPlan.h:691
const TargetLibraryInfo & TLI
Definition: VPlan.h:690
VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, Type *CanIVTy, LoopVectorizationCostModel &CM)
Definition: VPlan.h:696
const TargetTransformInfo & TTI
Definition: VPlan.h:689
SmallPtrSet< Instruction *, 8 > SkipCostComputation
Definition: VPlan.h:694
A recipe for handling first-order recurrence phis.
Definition: VPlan.h:2334
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2344
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start)
Definition: VPlan.h:2335
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
static bool classof(const VPHeaderPHIRecipe *R)
Definition: VPlan.h:2340
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition: VPlan.h:947
Hold state information used when constructing the CFG of the output IR, traversing the VPBasicBlocks ...
Definition: VPlan.h:337
BasicBlock * PrevBB
The previous IR BasicBlock created or used.
Definition: VPlan.h:343
SmallDenseMap< VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition: VPlan.h:351
VPBasicBlock * PrevVPBB
The previous VPBasicBlock visited. Initially set to null.
Definition: VPlan.h:339
BasicBlock * ExitBB
The last IR BasicBlock in the output IR.
Definition: VPlan.h:347
CFGState(DominatorTree *DT)
Definition: VPlan.h:356
BasicBlock * getPreheaderBBFor(VPRecipeBase *R)
Returns the BasicBlock* mapped to the pre-header of the loop region containing R.
Definition: VPlan.cpp:352
DomTreeUpdater DTU
Updater for the DominatorTree.
Definition: VPlan.h:354
DenseMap< VPValue *, Value * > VPV2Vector
Definition: VPlan.h:254
DenseMap< VPValue *, SmallVector< Value *, 4 > > VPV2Scalars
Definition: VPlan.h:256
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition: VPlan.h:236
bool hasScalarValue(VPValue *Def, VPLane Lane)
Definition: VPlan.h:268
bool hasVectorValue(VPValue *Def)
Definition: VPlan.h:266
LoopInfo * LI
Hold a pointer to LoopInfo to register new basic blocks in the loop.
Definition: VPlan.h:365
DenseMap< const SCEV *, Value * > ExpandedSCEVs
Map SCEVs to their expanded values.
Definition: VPlan.h:388
VPTypeAnalysis TypeAnalysis
VPlan-based type analysis.
Definition: VPlan.h:391
struct llvm::VPTransformState::DataState Data
void addMetadata(Value *To, Instruction *From)
Add metadata from one instruction to another.
Definition: VPlan.cpp:365
void packScalarIntoVectorValue(VPValue *Def, const VPLane &Lane)
Construct the vector value of a scalarized value V one lane at a time.
Definition: VPlan.cpp:398
Value * get(VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition: VPlan.cpp:253
struct llvm::VPTransformState::CFGState CFG
LoopVersioning * LVer
LoopVersioning.
Definition: VPlan.h:384
void addNewMetadata(Instruction *To, const Instruction *Orig)
Add additional metadata to To that was not present on Orig.
Definition: VPlan.cpp:357
void reset(VPValue *Def, Value *V, const VPLane &Lane)
Reset an existing scalar value for Def and a given Lane.
Definition: VPlan.h:305
std::optional< VPLane > Lane
Hold the index to generate specific scalar instructions.
Definition: VPlan.h:249
void set(VPValue *Def, Value *V, const VPLane &Lane)
Set the generated scalar V for Def and the given Lane.
Definition: VPlan.h:295
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition: VPlan.h:368
const TargetTransformInfo * TTI
Target Transform Info.
Definition: VPlan.h:241
VPlan * Plan
Pointer to the VPlan code is generated for.
Definition: VPlan.h:374
InnerLoopVectorizer * ILV
Hold a pointer to InnerLoopVectorizer to reuse its IR generation methods.
Definition: VPlan.h:371
void reset(VPValue *Def, Value *V)
Reset an existing vector value for Def and a given Part.
Definition: VPlan.h:289
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition: VPlan.h:244
Loop * CurrentVectorLoop
The loop object for the current parent region, or nullptr.
Definition: VPlan.h:377
void setDebugLocFrom(DebugLoc DL)
Set the debug location in the builder using the debug location DL.
Definition: VPlan.cpp:376
void set(VPValue *Def, Value *V, bool IsScalar=false)
Set the generated vector Value for a given VPValue, if IsScalar is false.
Definition: VPlan.h:278
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition: VPlan.h:2998
void execute(VPTransformState &State) override
Generate the wide load or gather.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3010
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3026
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:2999
A recipe for widening load operations, using the address to load from and an optional mask.
Definition: VPlan.h:2959
VP_CLASSOF_IMPL(VPDef::VPWidenLoadSC)
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:2960
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:2986
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:2968
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening select instructions.
Definition: VPlan.h:1813
bool isInvariantCond() const
Definition: VPlan.h:1845
VPWidenSelectRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:1821
VPWidenSelectRecipe(SelectInst &I, iterator_range< IterT > Operands)
Definition: VPlan.h:1815
VPValue * getCond() const
Definition: VPlan.h:1841
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenSelectRecipe.
void execute(VPTransformState &State) override
Produce a widened version of the select instruction.
~VPWidenSelectRecipe() override=default
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition: VPlan.h:3078
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition: VPlan.h:3089
void execute(VPTransformState &State) override
Generate the wide store or scatter.
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3108
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue &EVL, VPValue *Mask)
Definition: VPlan.h:3079
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition: VPlan.h:3092
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition: VPlan.h:3037
void execute(VPTransformState &State) override
Generate a wide store or scatter.
bool onlyFirstLaneUsed(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition: VPlan.h:3066
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, bool Reverse, DebugLoc DL)
Definition: VPlan.h:3038
VP_CLASSOF_IMPL(VPDef::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition: VPlan.h:3054
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition: VPlan.h:3045
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPlanIngredient(const Value *V)
Definition: VPlan.h:4086
const Value * V
Definition: VPlan.h:4084
void print(raw_ostream &O) const
Definition: VPlan.cpp:1398